1 // FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it. 2 // FIXME: xml namespace support??? 3 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 4 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 5 6 // FIXME: the scriptable list is quite arbitrary 7 8 9 // xml entity references?! 10 11 /++ 12 This is an html DOM implementation, started with cloning 13 what the browser offers in Javascript, but going well beyond 14 it in convenience. 15 16 If you can do it in Javascript, you can probably do it with 17 this module, and much more. 18 19 --- 20 import arsd.dom; 21 22 void main() { 23 auto document = new Document("<html><p>paragraph</p></html>"); 24 writeln(document.querySelector("p")); 25 document.root.innerHTML = "<p>hey</p>"; 26 writeln(document); 27 } 28 --- 29 30 BTW: this file optionally depends on `arsd.characterencodings`, to 31 help it correctly read files from the internet. You should be able to 32 get characterencodings.d from the same place you got this file. 33 34 If you want it to stand alone, just always use the `Document.parseUtf8` 35 function or the constructor that takes a string. 36 37 Symbol_groups: 38 39 core_functionality = 40 41 These members provide core functionality. The members on these classes 42 will provide most your direct interaction. 43 44 bonus_functionality = 45 46 These provide additional functionality for special use cases. 47 48 implementations = 49 50 These provide implementations of other functionality. 51 +/ 52 module arsd.dom; 53 54 // FIXME: support the css standard namespace thing in the selectors too 55 56 version(with_arsd_jsvar) 57 import arsd.jsvar; 58 else { 59 enum scriptable = "arsd_jsvar_compatible"; 60 } 61 62 // this is only meant to be used at compile time, as a filter for opDispatch 63 // lists the attributes we want to allow without the use of .attr 64 bool isConvenientAttribute(string name) { 65 static immutable list = [ 66 "name", "id", "href", "value", 67 "checked", "selected", "type", 68 "src", "content", "pattern", 69 "placeholder", "required", "alt", 70 "rel", 71 "method", "action", "enctype" 72 ]; 73 foreach(l; list) 74 if(name == l) return true; 75 return false; 76 } 77 78 79 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 80 // FIXME: failing to close a paragraph sometimes messes things up too 81 82 // FIXME: it would be kinda cool to have some support for internal DTDs 83 // and maybe XPath as well, to some extent 84 /* 85 we could do 86 meh this sux 87 88 auto xpath = XPath(element); 89 90 // get the first p 91 xpath.p[0].a["href"] 92 */ 93 94 95 /// The main document interface, including a html parser. 96 /// Group: core_functionality 97 class Document : FileResource, DomParent { 98 inout(Document) asDocument() inout { return this; } 99 inout(Element) asElement() inout { return null; } 100 101 /// Convenience method for web scraping. Requires [arsd.http2] to be 102 /// included in the build as well as [arsd.characterencodings]. 103 static Document fromUrl()(string url, bool strictMode = false) { 104 import arsd.http2; 105 auto client = new HttpClient(); 106 107 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 108 auto res = req.waitForCompletion(); 109 110 auto document = new Document(); 111 if(strictMode) { 112 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 113 } else { 114 document.parseGarbage(cast(string) res.content); 115 } 116 117 return document; 118 } 119 120 ///. 121 this(string data, bool caseSensitive = false, bool strict = false) { 122 parseUtf8(data, caseSensitive, strict); 123 } 124 125 /** 126 Creates an empty document. It has *nothing* in it at all. 127 */ 128 this() { 129 130 } 131 132 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 133 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 134 /// can chain it. 135 /// 136 /// Example: document["p"].innerText("hello").addClass("modified"); 137 /// 138 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 139 /// 140 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 141 /// 142 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 143 /// you could put in some kind of custom filter function tho. 144 ElementCollection opIndex(string selector) { 145 auto e = ElementCollection(this.root); 146 return e[selector]; 147 } 148 149 string _contentType = "text/html; charset=utf-8"; 150 151 /// If you're using this for some other kind of XML, you can 152 /// set the content type here. 153 /// 154 /// Note: this has no impact on the function of this class. 155 /// It is only used if the document is sent via a protocol like HTTP. 156 /// 157 /// This may be called by parse() if it recognizes the data. Otherwise, 158 /// if you don't set it, it assumes text/html; charset=utf-8. 159 @property string contentType(string mimeType) { 160 _contentType = mimeType; 161 return _contentType; 162 } 163 164 /// implementing the FileResource interface, useful for sending via 165 /// http automatically. 166 @property string filename() const { return null; } 167 168 /// implementing the FileResource interface, useful for sending via 169 /// http automatically. 170 override @property string contentType() const { 171 return _contentType; 172 } 173 174 /// implementing the FileResource interface; it calls toString. 175 override immutable(ubyte)[] getData() const { 176 return cast(immutable(ubyte)[]) this.toString(); 177 } 178 179 180 /// Concatenates any consecutive text nodes 181 /* 182 void normalize() { 183 184 } 185 */ 186 187 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 188 /// Call this before calling parse(). 189 190 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 191 void enableAddingSpecialTagsToDom() { 192 parseSawComment = (string) => true; 193 parseSawAspCode = (string) => true; 194 parseSawPhpCode = (string) => true; 195 parseSawQuestionInstruction = (string) => true; 196 parseSawBangInstruction = (string) => true; 197 } 198 199 /// If the parser sees a html comment, it will call this callback 200 /// <!-- comment --> will call parseSawComment(" comment ") 201 /// Return true if you want the node appended to the document. 202 bool delegate(string) parseSawComment; 203 204 /// If the parser sees <% asp code... %>, it will call this callback. 205 /// It will be passed "% asp code... %" or "%= asp code .. %" 206 /// Return true if you want the node appended to the document. 207 bool delegate(string) parseSawAspCode; 208 209 /// If the parser sees <?php php code... ?>, it will call this callback. 210 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 211 /// Note: dom.d cannot identify the other php <? code ?> short format. 212 /// Return true if you want the node appended to the document. 213 bool delegate(string) parseSawPhpCode; 214 215 /// if it sees a <?xxx> that is not php or asp 216 /// it calls this function with the contents. 217 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 218 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 219 /// Return true if you want the node appended to the document. 220 bool delegate(string) parseSawQuestionInstruction; 221 222 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 223 /// it calls this function with the contents. 224 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 225 /// Return true if you want the node appended to the document. 226 bool delegate(string) parseSawBangInstruction; 227 228 /// Given the kind of garbage you find on the Internet, try to make sense of it. 229 /// Equivalent to document.parse(data, false, false, null); 230 /// (Case-insensitive, non-strict, determine character encoding from the data.) 231 232 /// NOTE: this makes no attempt at added security. 233 /// 234 /// It is a template so it lazily imports characterencodings. 235 void parseGarbage()(string data) { 236 parse(data, false, false, null); 237 } 238 239 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 240 /// Will throw exceptions on things like unclosed tags. 241 void parseStrict(string data) { 242 parseStream(toUtf8Stream(data), true, true); 243 } 244 245 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 246 /// tag soup, but does NOT try to correct bad character encodings. 247 /// 248 /// They will still throw an exception. 249 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 250 parseStream(toUtf8Stream(data), caseSensitive, strict); 251 } 252 253 // this is a template so we get lazy import behavior 254 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 255 import arsd.characterencodings; 256 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 257 if(dataEncoding is null) { 258 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 259 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 260 // Now, XML and HTML can both list encoding in the document, but we can't really parse 261 // it here without changing a lot of code until we know the encoding. So I'm going to 262 // do some hackish string checking. 263 if(dataEncoding is null) { 264 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 265 // first, look for an XML prolog 266 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 267 if(idx != -1) { 268 idx += "encoding=\"".length; 269 // we're probably past the prolog if it's this far in; we might be looking at 270 // content. Forget about it. 271 if(idx > 100) 272 idx = -1; 273 } 274 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 275 if(idx == -1) { 276 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 277 if(idx != -1) { 278 idx += "charset=".length; 279 if(dataAsBytes[idx] == '"') 280 idx++; 281 } 282 } 283 284 // found something in either branch... 285 if(idx != -1) { 286 // read till a quote or about 12 chars, whichever comes first... 287 auto end = idx; 288 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 289 end++; 290 291 dataEncoding = cast(string) dataAsBytes[idx .. end]; 292 } 293 // otherwise, we just don't know. 294 } 295 } 296 297 if(dataEncoding is null) { 298 if(strict) 299 throw new MarkupException("I couldn't figure out the encoding of this document."); 300 else 301 // if we really don't know by here, it means we already tried UTF-8, 302 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 303 // tags... let's assume it's Windows-1252, since that's probably the most 304 // common aside from utf that wouldn't be labeled. 305 306 dataEncoding = "Windows 1252"; 307 } 308 309 // and now, go ahead and convert it. 310 311 string data; 312 313 if(!strict) { 314 // if we're in non-strict mode, we need to check 315 // the document for mislabeling too; sometimes 316 // web documents will say they are utf-8, but aren't 317 // actually properly encoded. If it fails to validate, 318 // we'll assume it's actually Windows encoding - the most 319 // likely candidate for mislabeled garbage. 320 dataEncoding = dataEncoding.toLower(); 321 dataEncoding = dataEncoding.replace(" ", ""); 322 dataEncoding = dataEncoding.replace("-", ""); 323 dataEncoding = dataEncoding.replace("_", ""); 324 if(dataEncoding == "utf8") { 325 try { 326 validate(rawdata); 327 } catch(UTFException e) { 328 dataEncoding = "Windows 1252"; 329 } 330 } 331 } 332 333 if(dataEncoding != "UTF-8") { 334 if(strict) 335 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 336 else { 337 try { 338 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 339 } catch(Exception e) { 340 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 341 } 342 } 343 } else 344 data = rawdata; 345 346 return toUtf8Stream(data); 347 } 348 349 private 350 Utf8Stream toUtf8Stream(in string rawdata) { 351 string data = rawdata; 352 static if(is(Utf8Stream == string)) 353 return data; 354 else 355 return new Utf8Stream(data); 356 } 357 358 /++ 359 List of elements that can be assumed to be self-closed 360 in this document. The default for a Document are a hard-coded 361 list of ones appropriate for HTML. For [XmlDocument], it defaults 362 to empty. You can modify this after construction but before parsing. 363 364 History: 365 Added February 8, 2021 (included in dub release 9.2) 366 +/ 367 string[] selfClosedElements = htmlSelfClosedElements; 368 369 /++ 370 List of elements that are considered inline for pretty printing. 371 The default for a Document are hard-coded to something appropriate 372 for HTML. For [XmlDocument], it defaults to empty. You can modify 373 this after construction but before parsing. 374 375 History: 376 Added June 21, 2021 (included in dub release 10.1) 377 +/ 378 string[] inlineElements = htmlInlineElements; 379 380 /** 381 Take XMLish data and try to make the DOM tree out of it. 382 383 The goal isn't to be perfect, but to just be good enough to 384 approximate Javascript's behavior. 385 386 If strict, it throws on something that doesn't make sense. 387 (Examples: mismatched tags. It doesn't validate!) 388 If not strict, it tries to recover anyway, and only throws 389 when something is REALLY unworkable. 390 391 If strict is false, it uses a magic list of tags that needn't 392 be closed. If you are writing a document specifically for this, 393 try to avoid such - use self closed tags at least. Easier to parse. 394 395 The dataEncoding argument can be used to pass a specific 396 charset encoding for automatic conversion. If null (which is NOT 397 the default!), it tries to determine from the data itself, 398 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 399 400 If this assumption is wrong, it can throw on non-ascii 401 characters! 402 403 404 Note that it previously assumed the data was encoded as UTF-8, which 405 is why the dataEncoding argument defaults to that. 406 407 So it shouldn't break backward compatibility. 408 409 But, if you want the best behavior on wild data - figuring it out from the document 410 instead of assuming - you'll probably want to change that argument to null. 411 412 This is a template so it lazily imports arsd.characterencodings, which is required 413 to fix up data encodings. 414 415 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 416 dependency. If it is data from the Internet though, a random website, the encoding 417 is often a lie. This function, if dataEncoding == null, can correct for that, or 418 you can try parseGarbage. In those cases, arsd.characterencodings is required to 419 compile. 420 */ 421 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 422 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 423 parseStream(data, caseSensitive, strict); 424 } 425 426 // note: this work best in strict mode, unless data is just a simple string wrapper 427 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 428 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 429 // of my big app. 430 431 assert(data !is null); 432 433 // go through character by character. 434 // if you see a <, consider it a tag. 435 // name goes until the first non tagname character 436 // then see if it self closes or has an attribute 437 438 // if not in a tag, anything not a tag is a big text 439 // node child. It ends as soon as it sees a < 440 441 // Whitespace in text or attributes is preserved, but not between attributes 442 443 // & and friends are converted when I know them, left the same otherwise 444 445 446 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 447 //validate(data); // it *must* be UTF-8 for this to work correctly 448 449 sizediff_t pos = 0; 450 451 clear(); 452 453 loose = !caseSensitive; 454 455 bool sawImproperNesting = false; 456 bool paragraphHackfixRequired = false; 457 458 int getLineNumber(sizediff_t p) { 459 int line = 1; 460 foreach(c; data[0..p]) 461 if(c == '\n') 462 line++; 463 return line; 464 } 465 466 void parseError(string message) { 467 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 468 } 469 470 bool eatWhitespace() { 471 bool ateAny = false; 472 while(pos < data.length && data[pos].isSimpleWhite) { 473 pos++; 474 ateAny = true; 475 } 476 return ateAny; 477 } 478 479 string readTagName() { 480 // remember to include : for namespaces 481 // basically just keep going until >, /, or whitespace 482 auto start = pos; 483 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 484 { 485 pos++; 486 if(pos == data.length) { 487 if(strict) 488 throw new Exception("tag name incomplete when file ended"); 489 else 490 break; 491 } 492 } 493 494 if(!caseSensitive) 495 return toLower(data[start..pos]); 496 else 497 return data[start..pos]; 498 } 499 500 string readAttributeName() { 501 // remember to include : for namespaces 502 // basically just keep going until >, /, or whitespace 503 auto start = pos; 504 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 505 { 506 if(data[pos] == '<') { 507 if(strict) 508 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 509 else 510 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 511 } 512 pos++; 513 if(pos == data.length) { 514 if(strict) 515 throw new Exception("unterminated attribute name"); 516 else 517 break; 518 } 519 } 520 521 if(!caseSensitive) 522 return toLower(data[start..pos]); 523 else 524 return data[start..pos]; 525 } 526 527 string readAttributeValue() { 528 if(pos >= data.length) { 529 if(strict) 530 throw new Exception("no attribute value before end of file"); 531 else 532 return null; 533 } 534 switch(data[pos]) { 535 case '\'': 536 case '"': 537 auto started = pos; 538 char end = data[pos]; 539 pos++; 540 auto start = pos; 541 while(pos < data.length && data[pos] != end) 542 pos++; 543 if(strict && pos == data.length) 544 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 545 string v = htmlEntitiesDecode(data[start..pos], strict); 546 pos++; // skip over the end 547 return v; 548 default: 549 if(strict) 550 parseError("Attributes must be quoted"); 551 // read until whitespace or terminator (/> or >) 552 auto start = pos; 553 while( 554 pos < data.length && 555 data[pos] != '>' && 556 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 557 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 558 !data[pos].isSimpleWhite) 559 pos++; 560 561 string v = htmlEntitiesDecode(data[start..pos], strict); 562 // don't skip the end - we'll need it later 563 return v; 564 } 565 } 566 567 TextNode readTextNode() { 568 auto start = pos; 569 while(pos < data.length && data[pos] != '<') { 570 pos++; 571 } 572 573 return TextNode.fromUndecodedString(this, data[start..pos]); 574 } 575 576 // this is obsolete! 577 RawSource readCDataNode() { 578 auto start = pos; 579 while(pos < data.length && data[pos] != '<') { 580 pos++; 581 } 582 583 return new RawSource(this, data[start..pos]); 584 } 585 586 587 struct Ele { 588 int type; // element or closing tag or nothing 589 /* 590 type == 0 means regular node, self-closed (element is valid) 591 type == 1 means closing tag (payload is the tag name, element may be valid) 592 type == 2 means you should ignore it completely 593 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 594 type == 4 means the document was totally empty 595 */ 596 Element element; // for type == 0 or type == 3 597 string payload; // for type == 1 598 } 599 // recursively read a tag 600 Ele readElement(string[] parentChain = null) { 601 // FIXME: this is the slowest function in this module, by far, even in strict mode. 602 // Loose mode should perform decently, but strict mode is the important one. 603 if(!strict && parentChain is null) 604 parentChain = []; 605 606 static string[] recentAutoClosedTags; 607 608 if(pos >= data.length) 609 { 610 if(strict) { 611 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 612 } else { 613 if(parentChain.length) 614 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 615 else 616 return Ele(4); // signal emptiness upstream 617 } 618 } 619 620 if(data[pos] != '<') { 621 return Ele(0, readTextNode(), null); 622 } 623 624 enforce(data[pos] == '<'); 625 pos++; 626 if(pos == data.length) { 627 if(strict) 628 throw new MarkupException("Found trailing < at end of file"); 629 // if not strict, we'll just skip the switch 630 } else 631 switch(data[pos]) { 632 // I don't care about these, so I just want to skip them 633 case '!': // might be a comment, a doctype, or a special instruction 634 pos++; 635 636 // FIXME: we should store these in the tree too 637 // though I like having it stripped out tbh. 638 639 if(pos == data.length) { 640 if(strict) 641 throw new MarkupException("<! opened at end of file"); 642 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 643 // comment 644 pos += 2; 645 646 // FIXME: technically, a comment is anything 647 // between -- and -- inside a <!> block. 648 // so in <!-- test -- lol> , the " lol" is NOT a comment 649 // and should probably be handled differently in here, but for now 650 // I'll just keep running until --> since that's the common way 651 652 auto commentStart = pos; 653 while(pos+3 < data.length && data[pos..pos+3] != "-->") 654 pos++; 655 656 auto end = commentStart; 657 658 if(pos + 3 >= data.length) { 659 if(strict) 660 throw new MarkupException("unclosed comment"); 661 end = data.length; 662 pos = data.length; 663 } else { 664 end = pos; 665 assert(data[pos] == '-'); 666 pos++; 667 assert(data[pos] == '-'); 668 pos++; 669 assert(data[pos] == '>'); 670 pos++; 671 } 672 673 if(parseSawComment !is null) 674 if(parseSawComment(data[commentStart .. end])) { 675 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 676 } 677 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 678 pos += 7; 679 680 auto cdataStart = pos; 681 682 ptrdiff_t end = -1; 683 typeof(end) cdataEnd; 684 685 if(pos < data.length) { 686 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 687 end = data[pos .. $].indexOf("]]>"); 688 } 689 690 if(end == -1) { 691 if(strict) 692 throw new MarkupException("Unclosed CDATA section"); 693 end = pos; 694 cdataEnd = pos; 695 } else { 696 cdataEnd = pos + end; 697 pos = cdataEnd + 3; 698 } 699 700 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 701 } else { 702 auto start = pos; 703 while(pos < data.length && data[pos] != '>') 704 pos++; 705 706 auto bangEnds = pos; 707 if(pos == data.length) { 708 if(strict) 709 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 710 } else pos++; // skipping the > 711 712 if(parseSawBangInstruction !is null) 713 if(parseSawBangInstruction(data[start .. bangEnds])) { 714 // FIXME: these should be able to modify the parser state, 715 // doing things like adding entities, somehow. 716 717 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 718 } 719 } 720 721 /* 722 if(pos < data.length && data[pos] == '>') 723 pos++; // skip the > 724 else 725 assert(!strict); 726 */ 727 break; 728 case '%': 729 case '?': 730 /* 731 Here's what we want to support: 732 733 <% asp code %> 734 <%= asp code %> 735 <?php php code ?> 736 <?= php code ?> 737 738 The contents don't really matter, just if it opens with 739 one of the above for, it ends on the two char terminator. 740 741 <?something> 742 this is NOT php code 743 because I've seen this in the wild: <?EM-dummyText> 744 745 This could be php with shorttags which would be cut off 746 prematurely because if(a >) - that > counts as the close 747 of the tag, but since dom.d can't tell the difference 748 between that and the <?EM> real world example, it will 749 not try to look for the ?> ending. 750 751 The difference between this and the asp/php stuff is that it 752 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 753 on >. 754 */ 755 756 char end = data[pos]; 757 auto started = pos; 758 bool isAsp = end == '%'; 759 int currentIndex = 0; 760 bool isPhp = false; 761 bool isEqualTag = false; 762 int phpCount = 0; 763 764 more: 765 pos++; // skip the start 766 if(pos == data.length) { 767 if(strict) 768 throw new MarkupException("Unclosed <"~end~" by end of file"); 769 } else { 770 currentIndex++; 771 if(currentIndex == 1 && data[pos] == '=') { 772 if(!isAsp) 773 isPhp = true; 774 isEqualTag = true; 775 goto more; 776 } 777 if(currentIndex == 1 && data[pos] == 'p') 778 phpCount++; 779 if(currentIndex == 2 && data[pos] == 'h') 780 phpCount++; 781 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 782 isPhp = true; 783 784 if(data[pos] == '>') { 785 if((isAsp || isPhp) && data[pos - 1] != end) 786 goto more; 787 // otherwise we're done 788 } else 789 goto more; 790 } 791 792 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 793 auto code = data[started .. pos]; 794 795 796 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 797 if(pos < data.length) 798 pos++; // get past the > 799 800 if(isAsp && parseSawAspCode !is null) { 801 if(parseSawAspCode(code)) { 802 return Ele(3, new AspCode(this, code), null); 803 } 804 } else if(isPhp && parseSawPhpCode !is null) { 805 if(parseSawPhpCode(code)) { 806 return Ele(3, new PhpCode(this, code), null); 807 } 808 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 809 if(parseSawQuestionInstruction(code)) { 810 return Ele(3, new QuestionInstruction(this, code), null); 811 } 812 } 813 break; 814 case '/': // closing an element 815 pos++; // skip the start 816 auto p = pos; 817 while(pos < data.length && data[pos] != '>') 818 pos++; 819 //writefln("</%s>", data[p..pos]); 820 if(pos == data.length && data[pos-1] != '>') { 821 if(strict) 822 throw new MarkupException("File ended before closing tag had a required >"); 823 else 824 data ~= ">"; // just hack it in 825 } 826 pos++; // skip the '>' 827 828 string tname = data[p..pos-1]; 829 if(!caseSensitive) 830 tname = tname.toLower(); 831 832 return Ele(1, null, tname); // closing tag reports itself here 833 case ' ': // assume it isn't a real element... 834 if(strict) { 835 parseError("bad markup - improperly placed <"); 836 assert(0); // parseError always throws 837 } else 838 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 839 default: 840 841 if(!strict) { 842 // what about something that kinda looks like a tag, but isn't? 843 auto nextTag = data[pos .. $].indexOf("<"); 844 auto closeTag = data[pos .. $].indexOf(">"); 845 if(closeTag != -1 && nextTag != -1) 846 if(nextTag < closeTag) { 847 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 848 849 auto equal = data[pos .. $].indexOf("=\""); 850 if(equal != -1 && equal < closeTag) { 851 // this MIGHT be ok, soldier on 852 } else { 853 // definitely no good, this must be a (horribly distorted) text node 854 pos++; // skip the < we're on - don't want text node to end prematurely 855 auto node = readTextNode(); 856 node.contents = "<" ~ node.contents; // put this back 857 return Ele(0, node, null); 858 } 859 } 860 } 861 862 string tagName = readTagName(); 863 string[string] attributes; 864 865 Ele addTag(bool selfClosed) { 866 if(selfClosed) 867 pos++; 868 else { 869 if(!strict) 870 if(tagName.isInArray(selfClosedElements)) 871 // these are de-facto self closed 872 selfClosed = true; 873 } 874 875 import std.algorithm.comparison; 876 877 if(strict) { 878 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - 100) .. min(data.length, pos + 100)])); 879 } else { 880 // if we got here, it's probably because a slash was in an 881 // unquoted attribute - don't trust the selfClosed value 882 if(!selfClosed) 883 selfClosed = tagName.isInArray(selfClosedElements); 884 885 while(pos < data.length && data[pos] != '>') 886 pos++; 887 888 if(pos >= data.length) { 889 // the tag never closed 890 assert(data.length != 0); 891 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 892 } 893 } 894 895 auto whereThisTagStarted = pos; // for better error messages 896 897 pos++; 898 899 auto e = createElement(tagName); 900 e.attributes = attributes; 901 version(dom_node_indexes) { 902 if(e.dataset.nodeIndex.length == 0) 903 e.dataset.nodeIndex = to!string(&(e.attributes)); 904 } 905 e.selfClosed = selfClosed; 906 e.parseAttributes(); 907 908 909 // HACK to handle script and style as a raw data section as it is in HTML browsers 910 if(tagName == "script" || tagName == "style") { 911 if(!selfClosed) { 912 string closer = "</" ~ tagName ~ ">"; 913 ptrdiff_t ending; 914 if(pos >= data.length) 915 ending = -1; 916 else 917 ending = indexOf(data[pos..$], closer); 918 919 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 920 /* 921 if(loose && ending == -1 && pos < data.length) 922 ending = indexOf(data[pos..$], closer.toUpper()); 923 */ 924 if(ending == -1) { 925 if(strict) 926 throw new Exception("tag " ~ tagName ~ " never closed"); 927 else { 928 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 929 if(pos < data.length) { 930 e = new TextNode(this, data[pos .. $]); 931 pos = data.length; 932 } 933 } 934 } else { 935 ending += pos; 936 e.innerRawSource = data[pos..ending]; 937 pos = ending + closer.length; 938 } 939 } 940 return Ele(0, e, null); 941 } 942 943 bool closed = selfClosed; 944 945 void considerHtmlParagraphHack(Element n) { 946 assert(!strict); 947 if(e.tagName == "p" && e.tagName == n.tagName) { 948 // html lets you write <p> para 1 <p> para 1 949 // but in the dom tree, they should be siblings, not children. 950 paragraphHackfixRequired = true; 951 } 952 } 953 954 //writef("<%s>", tagName); 955 while(!closed) { 956 Ele n; 957 if(strict) 958 n = readElement(); 959 else 960 n = readElement(parentChain ~ tagName); 961 962 if(n.type == 4) return n; // the document is empty 963 964 if(n.type == 3 && n.element !is null) { 965 // special node, append if possible 966 if(e !is null) 967 e.appendChild(n.element); 968 else 969 piecesBeforeRoot ~= n.element; 970 } else if(n.type == 0) { 971 if(!strict) 972 considerHtmlParagraphHack(n.element); 973 e.appendChild(n.element); 974 } else if(n.type == 1) { 975 bool found = false; 976 if(n.payload != tagName) { 977 if(strict) 978 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 979 else { 980 sawImproperNesting = true; 981 // this is so we don't drop several levels of awful markup 982 if(n.element) { 983 if(!strict) 984 considerHtmlParagraphHack(n.element); 985 e.appendChild(n.element); 986 n.element = null; 987 } 988 989 // is the element open somewhere up the chain? 990 foreach(i, parent; parentChain) 991 if(parent == n.payload) { 992 recentAutoClosedTags ~= tagName; 993 // just rotating it so we don't inadvertently break stuff with vile crap 994 if(recentAutoClosedTags.length > 4) 995 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 996 997 n.element = e; 998 return n; 999 } 1000 1001 // if not, this is a text node; we can't fix it up... 1002 1003 // If it's already in the tree somewhere, assume it is closed by algorithm 1004 // and we shouldn't output it - odds are the user just flipped a couple tags 1005 foreach(ele; e.tree) { 1006 if(ele.tagName == n.payload) { 1007 found = true; 1008 break; 1009 } 1010 } 1011 1012 foreach(ele; recentAutoClosedTags) { 1013 if(ele == n.payload) { 1014 found = true; 1015 break; 1016 } 1017 } 1018 1019 if(!found) // if not found in the tree though, it's probably just text 1020 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1021 } 1022 } else { 1023 if(n.element) { 1024 if(!strict) 1025 considerHtmlParagraphHack(n.element); 1026 e.appendChild(n.element); 1027 } 1028 } 1029 1030 if(n.payload == tagName) // in strict mode, this is always true 1031 closed = true; 1032 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1033 } 1034 //writef("</%s>\n", tagName); 1035 return Ele(0, e, null); 1036 } 1037 1038 // if a tag was opened but not closed by end of file, we can arrive here 1039 if(!strict && pos >= data.length) 1040 return addTag(false); 1041 //else if(strict) assert(0); // should be caught before 1042 1043 switch(data[pos]) { 1044 default: assert(0); 1045 case '/': // self closing tag 1046 return addTag(true); 1047 case '>': 1048 return addTag(false); 1049 case ' ': 1050 case '\t': 1051 case '\n': 1052 case '\r': 1053 // there might be attributes... 1054 moreAttributes: 1055 eatWhitespace(); 1056 1057 // same deal as above the switch.... 1058 if(!strict && pos >= data.length) 1059 return addTag(false); 1060 1061 if(strict && pos >= data.length) 1062 throw new MarkupException("tag open, didn't find > before end of file"); 1063 1064 switch(data[pos]) { 1065 case '/': // self closing tag 1066 return addTag(true); 1067 case '>': // closed tag; open -- we now read the contents 1068 return addTag(false); 1069 default: // it is an attribute 1070 string attrName = readAttributeName(); 1071 string attrValue = attrName; 1072 1073 bool ateAny = eatWhitespace(); 1074 if(strict && ateAny) 1075 throw new MarkupException("inappropriate whitespace after attribute name"); 1076 1077 if(pos >= data.length) { 1078 if(strict) 1079 assert(0, "this should have thrown in readAttributeName"); 1080 else { 1081 data ~= ">"; 1082 goto blankValue; 1083 } 1084 } 1085 if(data[pos] == '=') { 1086 pos++; 1087 1088 ateAny = eatWhitespace(); 1089 // the spec actually allows this! 1090 //if(strict && ateAny) 1091 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1092 1093 attrValue = readAttributeValue(); 1094 1095 eatWhitespace(); 1096 } 1097 1098 blankValue: 1099 1100 if(strict && attrName in attributes) 1101 throw new MarkupException("Repeated attribute: " ~ attrName); 1102 1103 if(attrName.strip().length) 1104 attributes[attrName] = attrValue; 1105 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1106 1107 if(!strict && pos < data.length && data[pos] == '<') { 1108 // this is the broken tag that doesn't have a > at the end 1109 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1110 // let's insert one as a hack 1111 goto case '>'; 1112 } 1113 1114 goto moreAttributes; 1115 } 1116 } 1117 } 1118 1119 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1120 //assert(0); 1121 } 1122 1123 eatWhitespace(); 1124 Ele r; 1125 do { 1126 r = readElement(); // there SHOULD only be one element... 1127 1128 if(r.type == 3 && r.element !is null) 1129 piecesBeforeRoot ~= r.element; 1130 1131 if(r.type == 4) 1132 break; // the document is completely empty... 1133 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1134 1135 root = r.element; 1136 root.parent_ = this; 1137 1138 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1139 while(r.type != 4) { 1140 r = readElement(); 1141 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1142 if(r.element !is null) 1143 piecesAfterRoot ~= r.element; 1144 } 1145 } 1146 1147 if(root is null) 1148 { 1149 if(strict) 1150 assert(0, "empty document should be impossible in strict mode"); 1151 else 1152 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1153 } 1154 1155 if(paragraphHackfixRequired) { 1156 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1157 1158 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1159 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1160 1161 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1162 // Kind of inefficient because we can't detect when we recurse back out of a node. 1163 Element[Element] insertLocations; 1164 auto iterator = root.tree; 1165 foreach(ele; iterator) { 1166 if(ele.parentNode is null) 1167 continue; 1168 1169 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1170 auto shouldBePreviousSibling = ele.parentNode; 1171 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1172 if (auto p = holder in insertLocations) { 1173 shouldBePreviousSibling = *p; 1174 assert(shouldBePreviousSibling.parentNode is holder); 1175 } 1176 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1177 insertLocations[holder] = ele; 1178 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1179 } 1180 } 1181 } 1182 } 1183 1184 /* end massive parse function */ 1185 1186 /// Gets the <title> element's innerText, if one exists 1187 @property string title() { 1188 bool doesItMatch(Element e) { 1189 return (e.tagName == "title"); 1190 } 1191 1192 auto e = findFirst(&doesItMatch); 1193 if(e) 1194 return e.innerText(); 1195 return ""; 1196 } 1197 1198 /// Sets the title of the page, creating a <title> element if needed. 1199 @property void title(string t) { 1200 bool doesItMatch(Element e) { 1201 return (e.tagName == "title"); 1202 } 1203 1204 auto e = findFirst(&doesItMatch); 1205 1206 if(!e) { 1207 e = createElement("title"); 1208 auto heads = getElementsByTagName("head"); 1209 if(heads.length) 1210 heads[0].appendChild(e); 1211 } 1212 1213 if(e) 1214 e.innerText = t; 1215 } 1216 1217 // FIXME: would it work to alias root this; ???? might be a good idea 1218 /// These functions all forward to the root element. See the documentation in the Element class. 1219 Element getElementById(string id) { 1220 return root.getElementById(id); 1221 } 1222 1223 /// ditto 1224 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1225 if( is(SomeElementType : Element)) 1226 out(ret) { assert(ret !is null); } 1227 do { 1228 return root.requireElementById!(SomeElementType)(id, file, line); 1229 } 1230 1231 /// ditto 1232 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1233 if( is(SomeElementType : Element)) 1234 out(ret) { assert(ret !is null); } 1235 do { 1236 auto e = cast(SomeElementType) querySelector(selector); 1237 if(e is null) 1238 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1239 return e; 1240 } 1241 1242 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1243 if(is(SomeElementType : Element)) 1244 { 1245 auto e = cast(SomeElementType) querySelector(selector); 1246 return MaybeNullElement!SomeElementType(e); 1247 } 1248 1249 /// ditto 1250 @scriptable 1251 Element querySelector(string selector) { 1252 // see comment below on Document.querySelectorAll 1253 auto s = Selector(selector);//, !loose); 1254 foreach(ref comp; s.components) 1255 if(comp.parts.length && comp.parts[0].separation == 0) 1256 comp.parts[0].separation = -1; 1257 foreach(e; s.getMatchingElementsLazy(this.root)) 1258 return e; 1259 return null; 1260 1261 } 1262 1263 /// ditto 1264 @scriptable 1265 Element[] querySelectorAll(string selector) { 1266 // In standards-compliant code, the document is slightly magical 1267 // in that it is a pseudoelement at top level. It should actually 1268 // match the root as one of its children. 1269 // 1270 // In versions of dom.d before Dec 29 2019, this worked because 1271 // querySelectorAll was willing to return itself. With that bug fix 1272 // (search "arbitrary id asduiwh" in this file for associated unittest) 1273 // this would have failed. Hence adding back the root if it matches the 1274 // selector itself. 1275 // 1276 // I'd love to do this better later. 1277 1278 auto s = Selector(selector);//, !loose); 1279 foreach(ref comp; s.components) 1280 if(comp.parts.length && comp.parts[0].separation == 0) 1281 comp.parts[0].separation = -1; 1282 return s.getMatchingElements(this.root); 1283 } 1284 1285 /// ditto 1286 deprecated("use querySelectorAll instead") 1287 Element[] getElementsBySelector(string selector) { 1288 return root.getElementsBySelector(selector); 1289 } 1290 1291 /// ditto 1292 @scriptable 1293 Element[] getElementsByTagName(string tag) { 1294 return root.getElementsByTagName(tag); 1295 } 1296 1297 /// ditto 1298 @scriptable 1299 Element[] getElementsByClassName(string tag) { 1300 return root.getElementsByClassName(tag); 1301 } 1302 1303 /** FIXME: btw, this could just be a lazy range...... */ 1304 Element getFirstElementByTagName(string tag) { 1305 if(loose) 1306 tag = tag.toLower(); 1307 bool doesItMatch(Element e) { 1308 return e.tagName == tag; 1309 } 1310 return findFirst(&doesItMatch); 1311 } 1312 1313 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1314 Element mainBody() { 1315 return getFirstElementByTagName("body"); 1316 } 1317 1318 /// this uses a weird thing... it's [name=] if no colon and 1319 /// [property=] if colon 1320 string getMeta(string name) { 1321 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1322 auto e = querySelector("head meta["~thing~"="~name~"]"); 1323 if(e is null) 1324 return null; 1325 return e.content; 1326 } 1327 1328 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1329 void setMeta(string name, string value) { 1330 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1331 auto e = querySelector("head meta["~thing~"="~name~"]"); 1332 if(e is null) { 1333 e = requireSelector("head").addChild("meta"); 1334 e.setAttribute(thing, name); 1335 } 1336 1337 e.content = value; 1338 } 1339 1340 ///. 1341 Form[] forms() { 1342 return cast(Form[]) getElementsByTagName("form"); 1343 } 1344 1345 ///. 1346 Form createForm() 1347 out(ret) { 1348 assert(ret !is null); 1349 } 1350 do { 1351 return cast(Form) createElement("form"); 1352 } 1353 1354 ///. 1355 Element createElement(string name) { 1356 if(loose) 1357 name = name.toLower(); 1358 1359 auto e = Element.make(name, null, null, selfClosedElements); 1360 1361 return e; 1362 1363 // return new Element(this, name, null, selfClosed); 1364 } 1365 1366 ///. 1367 Element createFragment() { 1368 return new DocumentFragment(this); 1369 } 1370 1371 ///. 1372 Element createTextNode(string content) { 1373 return new TextNode(this, content); 1374 } 1375 1376 1377 ///. 1378 Element findFirst(bool delegate(Element) doesItMatch) { 1379 if(root is null) 1380 return null; 1381 Element result; 1382 1383 bool goThroughElement(Element e) { 1384 if(doesItMatch(e)) { 1385 result = e; 1386 return true; 1387 } 1388 1389 foreach(child; e.children) { 1390 if(goThroughElement(child)) 1391 return true; 1392 } 1393 1394 return false; 1395 } 1396 1397 goThroughElement(root); 1398 1399 return result; 1400 } 1401 1402 ///. 1403 void clear() { 1404 root = null; 1405 loose = false; 1406 } 1407 1408 ///. 1409 void setProlog(string d) { 1410 _prolog = d; 1411 prologWasSet = true; 1412 } 1413 1414 ///. 1415 private string _prolog = "<!DOCTYPE html>\n"; 1416 private bool prologWasSet = false; // set to true if the user changed it 1417 1418 @property string prolog() const { 1419 // if the user explicitly changed it, do what they want 1420 // or if we didn't keep/find stuff from the document itself, 1421 // we'll use the builtin one as a default. 1422 if(prologWasSet || piecesBeforeRoot.length == 0) 1423 return _prolog; 1424 1425 string p; 1426 foreach(e; piecesBeforeRoot) 1427 p ~= e.toString() ~ "\n"; 1428 return p; 1429 } 1430 1431 ///. 1432 override string toString() const { 1433 return prolog ~ root.toString(); 1434 } 1435 1436 /++ 1437 Writes it out with whitespace for easier eyeball debugging 1438 1439 Do NOT use for anything other than eyeball debugging, 1440 because whitespace may be significant content in XML. 1441 +/ 1442 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1443 import std.string; 1444 string s = prolog.strip; 1445 1446 /* 1447 if(insertComments) s ~= "<!--"; 1448 s ~= "\n"; 1449 if(insertComments) s ~= "-->"; 1450 */ 1451 1452 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1453 foreach(a; piecesAfterRoot) 1454 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1455 return s; 1456 } 1457 1458 ///. 1459 Element root; 1460 1461 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1462 Element[] piecesBeforeRoot; 1463 1464 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1465 Element[] piecesAfterRoot; 1466 1467 ///. 1468 bool loose; 1469 1470 1471 1472 // what follows are for mutation events that you can observe 1473 void delegate(DomMutationEvent)[] eventObservers; 1474 1475 void dispatchMutationEvent(DomMutationEvent e) { 1476 foreach(o; eventObservers) 1477 o(e); 1478 } 1479 } 1480 1481 interface DomParent { 1482 inout(Document) asDocument() inout; 1483 inout(Element) asElement() inout; 1484 } 1485 1486 /// This represents almost everything in the DOM. 1487 /// Group: core_functionality 1488 class Element : DomParent { 1489 inout(Document) asDocument() inout { return null; } 1490 inout(Element) asElement() inout { return this; } 1491 1492 /// Returns a collection of elements by selector. 1493 /// See: [Document.opIndex] 1494 ElementCollection opIndex(string selector) { 1495 auto e = ElementCollection(this); 1496 return e[selector]; 1497 } 1498 1499 /++ 1500 Returns the child node with the particular index. 1501 1502 Be aware that child nodes include text nodes, including 1503 whitespace-only nodes. 1504 +/ 1505 Element opIndex(size_t index) { 1506 if(index >= children.length) 1507 return null; 1508 return this.children[index]; 1509 } 1510 1511 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1512 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1513 if( 1514 is(SomeElementType : Element) 1515 ) 1516 out(ret) { 1517 assert(ret !is null); 1518 } 1519 do { 1520 auto e = cast(SomeElementType) getElementById(id); 1521 if(e is null) 1522 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1523 return e; 1524 } 1525 1526 /// ditto but with selectors instead of ids 1527 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1528 if( 1529 is(SomeElementType : Element) 1530 ) 1531 out(ret) { 1532 assert(ret !is null); 1533 } 1534 do { 1535 auto e = cast(SomeElementType) querySelector(selector); 1536 if(e is null) 1537 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1538 return e; 1539 } 1540 1541 1542 /++ 1543 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1544 +/ 1545 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1546 if(is(SomeElementType : Element)) 1547 { 1548 auto e = cast(SomeElementType) querySelector(selector); 1549 return MaybeNullElement!SomeElementType(e); 1550 } 1551 1552 1553 1554 /// get all the classes on this element 1555 @property string[] classes() { 1556 return split(className, " "); 1557 } 1558 1559 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1560 @scriptable 1561 Element addClass(string c) { 1562 if(hasClass(c)) 1563 return this; // don't add it twice 1564 1565 string cn = getAttribute("class"); 1566 if(cn.length == 0) { 1567 setAttribute("class", c); 1568 return this; 1569 } else { 1570 setAttribute("class", cn ~ " " ~ c); 1571 } 1572 1573 return this; 1574 } 1575 1576 /// Removes a particular class name. 1577 @scriptable 1578 Element removeClass(string c) { 1579 if(!hasClass(c)) 1580 return this; 1581 string n; 1582 foreach(name; classes) { 1583 if(c == name) 1584 continue; // cut it out 1585 if(n.length) 1586 n ~= " "; 1587 n ~= name; 1588 } 1589 1590 className = n.strip(); 1591 1592 return this; 1593 } 1594 1595 /// Returns whether the given class appears in this element. 1596 bool hasClass(string c) { 1597 string cn = className; 1598 1599 auto idx = cn.indexOf(c); 1600 if(idx == -1) 1601 return false; 1602 1603 foreach(cla; cn.split(" ")) 1604 if(cla == c) 1605 return true; 1606 return false; 1607 1608 /* 1609 int rightSide = idx + c.length; 1610 1611 bool checkRight() { 1612 if(rightSide == cn.length) 1613 return true; // it's the only class 1614 else if(iswhite(cn[rightSide])) 1615 return true; 1616 return false; // this is a substring of something else.. 1617 } 1618 1619 if(idx == 0) { 1620 return checkRight(); 1621 } else { 1622 if(!iswhite(cn[idx - 1])) 1623 return false; // substring 1624 return checkRight(); 1625 } 1626 1627 assert(0); 1628 */ 1629 } 1630 1631 1632 /* ******************************* 1633 DOM Mutation 1634 *********************************/ 1635 /// convenience function to quickly add a tag with some text or 1636 /// other relevant info (for example, it's a src for an <img> element 1637 /// instead of inner text) 1638 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1639 in { 1640 assert(tagName !is null); 1641 } 1642 out(e) { 1643 //assert(e.parentNode is this); 1644 //assert(e.parentDocument is this.parentDocument); 1645 } 1646 do { 1647 auto e = Element.make(tagName, childInfo, childInfo2); 1648 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1649 // return the parent. That will break existing code though. 1650 return appendChild(e); 1651 } 1652 1653 /// Another convenience function. Adds a child directly after the current one, returning 1654 /// the new child. 1655 /// 1656 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1657 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1658 in { 1659 assert(tagName !is null); 1660 assert(parentNode !is null); 1661 } 1662 out(e) { 1663 assert(e.parentNode is this.parentNode); 1664 assert(e.parentDocument is this.parentDocument); 1665 } 1666 do { 1667 auto e = Element.make(tagName, childInfo, childInfo2); 1668 return parentNode.insertAfter(this, e); 1669 } 1670 1671 /// 1672 Element addSibling(Element e) { 1673 return parentNode.insertAfter(this, e); 1674 } 1675 1676 /// 1677 Element addChild(Element e) { 1678 return this.appendChild(e); 1679 } 1680 1681 /// Convenience function to append text intermixed with other children. 1682 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1683 /// or div.addChildren("Hello, ", user.name, "!"); 1684 1685 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1686 void addChildren(T...)(T t) { 1687 foreach(item; t) { 1688 static if(is(item : Element)) 1689 appendChild(item); 1690 else static if (is(isSomeString!(item))) 1691 appendText(to!string(item)); 1692 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1693 } 1694 } 1695 1696 ///. 1697 Element addChild(string tagName, Element firstChild, string info2 = null) 1698 in { 1699 assert(firstChild !is null); 1700 } 1701 out(ret) { 1702 assert(ret !is null); 1703 assert(ret.parentNode is this); 1704 assert(firstChild.parentNode is ret); 1705 1706 assert(ret.parentDocument is this.parentDocument); 1707 //assert(firstChild.parentDocument is this.parentDocument); 1708 } 1709 do { 1710 auto e = Element.make(tagName, "", info2); 1711 e.appendChild(firstChild); 1712 this.appendChild(e); 1713 return e; 1714 } 1715 1716 /// 1717 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1718 in { 1719 } 1720 out(ret) { 1721 assert(ret !is null); 1722 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 1723 assert(ret.parentDocument is this.parentDocument); 1724 } 1725 do { 1726 auto e = Element.make(tagName, "", info2); 1727 this.appendChild(e); 1728 e.innerHTML = innerHtml.source; 1729 return e; 1730 } 1731 1732 1733 /// . 1734 void appendChildren(Element[] children) { 1735 foreach(ele; children) 1736 appendChild(ele); 1737 } 1738 1739 ///. 1740 void reparent(Element newParent) 1741 in { 1742 assert(newParent !is null); 1743 assert(parentNode !is null); 1744 } 1745 out { 1746 assert(this.parentNode is newParent); 1747 //assert(isInArray(this, newParent.children)); 1748 } 1749 do { 1750 parentNode.removeChild(this); 1751 newParent.appendChild(this); 1752 } 1753 1754 /** 1755 Strips this tag out of the document, putting its inner html 1756 as children of the parent. 1757 1758 For example, given: `<p>hello <b>there</b></p>`, if you 1759 call `stripOut` on the `b` element, you'll be left with 1760 `<p>hello there<p>`. 1761 1762 The idea here is to make it easy to get rid of garbage 1763 markup you aren't interested in. 1764 */ 1765 void stripOut() 1766 in { 1767 assert(parentNode !is null); 1768 } 1769 out { 1770 assert(parentNode is null); 1771 assert(children.length == 0); 1772 } 1773 do { 1774 foreach(c; children) 1775 c.parentNode = null; // remove the parent 1776 if(children.length) 1777 parentNode.replaceChild(this, this.children); 1778 else 1779 parentNode.removeChild(this); 1780 this.children.length = 0; // we reparented them all above 1781 } 1782 1783 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1784 /// if the element already isn't in a tree, it does nothing. 1785 Element removeFromTree() 1786 in { 1787 1788 } 1789 out(var) { 1790 assert(this.parentNode is null); 1791 assert(var is this); 1792 } 1793 do { 1794 if(this.parentNode is null) 1795 return this; 1796 1797 this.parentNode.removeChild(this); 1798 1799 return this; 1800 } 1801 1802 /++ 1803 Wraps this element inside the given element. 1804 It's like `this.replaceWith(what); what.appendchild(this);` 1805 1806 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1807 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1808 +/ 1809 Element wrapIn(Element what) 1810 in { 1811 assert(what !is null); 1812 } 1813 out(ret) { 1814 assert(this.parentNode is what); 1815 assert(ret is what); 1816 } 1817 do { 1818 this.replaceWith(what); 1819 what.appendChild(this); 1820 1821 return what; 1822 } 1823 1824 /// Replaces this element with something else in the tree. 1825 Element replaceWith(Element e) 1826 in { 1827 assert(this.parentNode !is null); 1828 } 1829 do { 1830 e.removeFromTree(); 1831 this.parentNode.replaceChild(this, e); 1832 return e; 1833 } 1834 1835 /** 1836 Splits the className into an array of each class given 1837 */ 1838 string[] classNames() const { 1839 return className().split(" "); 1840 } 1841 1842 /** 1843 Fetches the first consecutive text nodes concatenated together. 1844 1845 1846 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1847 1848 See_also: [directText], [innerText] 1849 */ 1850 string firstInnerText() const { 1851 string s; 1852 foreach(child; children) { 1853 if(child.nodeType != NodeType.Text) 1854 break; 1855 1856 s ~= child.nodeValue(); 1857 } 1858 return s; 1859 } 1860 1861 1862 /** 1863 Returns the text directly under this element. 1864 1865 1866 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1867 past child tags. So, `<example>some <b>bold</b> text</example>` 1868 will return `some text` because it only gets the text, skipping non-text children. 1869 1870 See_also: [firstInnerText], [innerText] 1871 */ 1872 @property string directText() { 1873 string ret; 1874 foreach(e; children) { 1875 if(e.nodeType == NodeType.Text) 1876 ret ~= e.nodeValue(); 1877 } 1878 1879 return ret; 1880 } 1881 1882 /** 1883 Sets the direct text, without modifying other child nodes. 1884 1885 1886 Unlike [innerText], this does *not* remove existing elements in the element. 1887 1888 It only replaces the first text node it sees. 1889 1890 If there are no text nodes, it calls [appendText]. 1891 1892 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1893 */ 1894 @property void directText(string text) { 1895 foreach(e; children) { 1896 if(e.nodeType == NodeType.Text) { 1897 auto it = cast(TextNode) e; 1898 it.contents = text; 1899 return; 1900 } 1901 } 1902 1903 appendText(text); 1904 } 1905 1906 // do nothing, this is primarily a virtual hook 1907 // for links and forms 1908 void setValue(string field, string value) { } 1909 1910 1911 // this is a thing so i can remove observer support if it gets slow 1912 // I have not implemented all these yet 1913 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1914 if(parentDocument is null) return; 1915 DomMutationEvent me; 1916 me.operation = operation; 1917 me.target = this; 1918 me.relatedString = s1; 1919 me.relatedString2 = s2; 1920 me.related = r; 1921 me.related2 = r2; 1922 parentDocument.dispatchMutationEvent(me); 1923 } 1924 1925 // putting all the members up front 1926 1927 // this ought to be private. don't use it directly. 1928 Element[] children; 1929 1930 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1931 string tagName; 1932 1933 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1934 string[string] attributes; 1935 1936 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1937 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1938 private bool selfClosed; 1939 1940 private DomParent parent_; 1941 1942 /// Get the parent Document object that contains this element. 1943 /// It may be null, so remember to check for that. 1944 @property inout(Document) parentDocument() inout { 1945 if(this.parent_ is null) 1946 return null; 1947 auto p = cast() this.parent_.asElement; 1948 auto prev = cast() this; 1949 while(p) { 1950 prev = p; 1951 if(p.parent_ is null) 1952 return null; 1953 p = cast() p.parent_.asElement; 1954 } 1955 return cast(inout) prev.parent_.asDocument; 1956 } 1957 1958 deprecated @property void parentDocument(Document doc) { 1959 parent_ = doc; 1960 } 1961 1962 ///. 1963 inout(Element) parentNode() inout { 1964 if(parent_ is null) 1965 return null; 1966 1967 auto p = parent_.asElement; 1968 1969 if(cast(DocumentFragment) p) 1970 return p.parent_.asElement; 1971 1972 return p; 1973 } 1974 1975 //protected 1976 Element parentNode(Element e) { 1977 parent_ = e; 1978 return e; 1979 } 1980 1981 // these are here for event handlers. Don't forget that this library never fires events. 1982 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1983 1984 version(dom_with_events) { 1985 EventHandler[][string] bubblingEventHandlers; 1986 EventHandler[][string] capturingEventHandlers; 1987 EventHandler[string] defaultEventHandlers; 1988 1989 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1990 if(event.length > 2 && event[0..2] == "on") 1991 event = event[2 .. $]; 1992 1993 if(useCapture) 1994 capturingEventHandlers[event] ~= handler; 1995 else 1996 bubblingEventHandlers[event] ~= handler; 1997 } 1998 } 1999 2000 2001 // and now methods 2002 2003 /++ 2004 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 2005 2006 History: 2007 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 2008 immutable global list for HTML. It still defaults to the same list, but you can change it now via 2009 the parameter. 2010 +/ 2011 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2012 bool selfClosed = tagName.isInArray(selfClosedElements); 2013 2014 Element e; 2015 // want to create the right kind of object for the given tag... 2016 switch(tagName) { 2017 case "#text": 2018 e = new TextNode(null, childInfo); 2019 return e; 2020 // break; 2021 case "table": 2022 e = new Table(null); 2023 break; 2024 case "a": 2025 e = new Link(null); 2026 break; 2027 case "form": 2028 e = new Form(null); 2029 break; 2030 case "tr": 2031 e = new TableRow(null); 2032 break; 2033 case "td", "th": 2034 e = new TableCell(null, tagName); 2035 break; 2036 default: 2037 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 2038 } 2039 2040 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 2041 e.tagName = tagName; 2042 e.selfClosed = selfClosed; 2043 2044 if(childInfo !is null) 2045 switch(tagName) { 2046 /* html5 convenience tags */ 2047 case "audio": 2048 if(childInfo.length) 2049 e.addChild("source", childInfo); 2050 if(childInfo2 !is null) 2051 e.appendText(childInfo2); 2052 break; 2053 case "source": 2054 e.src = childInfo; 2055 if(childInfo2 !is null) 2056 e.type = childInfo2; 2057 break; 2058 /* regular html 4 stuff */ 2059 case "img": 2060 e.src = childInfo; 2061 if(childInfo2 !is null) 2062 e.alt = childInfo2; 2063 break; 2064 case "link": 2065 e.href = childInfo; 2066 if(childInfo2 !is null) 2067 e.rel = childInfo2; 2068 break; 2069 case "option": 2070 e.innerText = childInfo; 2071 if(childInfo2 !is null) 2072 e.value = childInfo2; 2073 break; 2074 case "input": 2075 e.type = "hidden"; 2076 e.name = childInfo; 2077 if(childInfo2 !is null) 2078 e.value = childInfo2; 2079 break; 2080 case "button": 2081 e.innerText = childInfo; 2082 if(childInfo2 !is null) 2083 e.type = childInfo2; 2084 break; 2085 case "a": 2086 e.innerText = childInfo; 2087 if(childInfo2 !is null) 2088 e.href = childInfo2; 2089 break; 2090 case "script": 2091 case "style": 2092 e.innerRawSource = childInfo; 2093 break; 2094 case "meta": 2095 e.name = childInfo; 2096 if(childInfo2 !is null) 2097 e.content = childInfo2; 2098 break; 2099 /* generically, assume we were passed text and perhaps class */ 2100 default: 2101 e.innerText = childInfo; 2102 if(childInfo2.length) 2103 e.className = childInfo2; 2104 } 2105 2106 return e; 2107 } 2108 2109 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2110 // FIXME: childInfo2 is ignored when info1 is null 2111 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2112 m.innerHTML = innerHtml.source; 2113 return m; 2114 } 2115 2116 static Element make(string tagName, Element child, string childInfo2 = null) { 2117 auto m = Element.make(tagName, cast(string) null, childInfo2); 2118 m.appendChild(child); 2119 return m; 2120 } 2121 2122 2123 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2124 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2125 tagName = _tagName; 2126 if(_attributes !is null) 2127 attributes = _attributes; 2128 selfClosed = _selfClosed; 2129 2130 version(dom_node_indexes) 2131 this.dataset.nodeIndex = to!string(&(this.attributes)); 2132 2133 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2134 } 2135 2136 /++ 2137 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2138 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2139 2140 History: 2141 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2142 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2143 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2144 something here if you like. 2145 +/ 2146 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2147 tagName = _tagName; 2148 if(_attributes !is null) 2149 attributes = _attributes; 2150 selfClosed = tagName.isInArray(selfClosedElements); 2151 2152 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2153 //children.length = 8; 2154 //children.length = 0; 2155 2156 version(dom_node_indexes) 2157 this.dataset.nodeIndex = to!string(&(this.attributes)); 2158 } 2159 2160 private this(Document _parentDocument) { 2161 version(dom_node_indexes) 2162 this.dataset.nodeIndex = to!string(&(this.attributes)); 2163 } 2164 2165 2166 /* ******************************* 2167 Navigating the DOM 2168 *********************************/ 2169 2170 /// Returns the first child of this element. If it has no children, returns null. 2171 /// Remember, text nodes are children too. 2172 @property Element firstChild() { 2173 return children.length ? children[0] : null; 2174 } 2175 2176 /// 2177 @property Element lastChild() { 2178 return children.length ? children[$ - 1] : null; 2179 } 2180 2181 /// UNTESTED 2182 /// the next element you would encounter if you were reading it in the source 2183 Element nextInSource() { 2184 auto n = firstChild; 2185 if(n is null) 2186 n = nextSibling(); 2187 if(n is null) { 2188 auto p = this.parentNode; 2189 while(p !is null && n is null) { 2190 n = p.nextSibling; 2191 } 2192 } 2193 2194 return n; 2195 } 2196 2197 /// UNTESTED 2198 /// ditto 2199 Element previousInSource() { 2200 auto p = previousSibling; 2201 if(p is null) { 2202 auto par = parentNode; 2203 if(par) 2204 p = par.lastChild; 2205 if(p is null) 2206 p = par; 2207 } 2208 return p; 2209 } 2210 2211 ///. 2212 @property Element previousElementSibling() { 2213 return previousSibling("*"); 2214 } 2215 2216 ///. 2217 @property Element previousSibling(string tagName = null) { 2218 if(this.parentNode is null) 2219 return null; 2220 Element ps = null; 2221 foreach(e; this.parentNode.childNodes) { 2222 if(e is this) 2223 break; 2224 if(tagName == "*" && e.nodeType != NodeType.Text) { 2225 ps = e; 2226 } else if(tagName is null || e.tagName == tagName) 2227 ps = e; 2228 } 2229 2230 return ps; 2231 } 2232 2233 ///. 2234 @property Element nextElementSibling() { 2235 return nextSibling("*"); 2236 } 2237 2238 ///. 2239 @property Element nextSibling(string tagName = null) { 2240 if(this.parentNode is null) 2241 return null; 2242 Element ns = null; 2243 bool mightBe = false; 2244 foreach(e; this.parentNode.childNodes) { 2245 if(e is this) { 2246 mightBe = true; 2247 continue; 2248 } 2249 if(mightBe) { 2250 if(tagName == "*" && e.nodeType != NodeType.Text) { 2251 ns = e; 2252 break; 2253 } 2254 if(tagName is null || e.tagName == tagName) { 2255 ns = e; 2256 break; 2257 } 2258 } 2259 } 2260 2261 return ns; 2262 } 2263 2264 2265 /// Gets the nearest node, going up the chain, with the given tagName 2266 /// May return null or throw. 2267 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2268 if(tagName is null) { 2269 static if(is(T == Form)) 2270 tagName = "form"; 2271 else static if(is(T == Table)) 2272 tagName = "table"; 2273 else static if(is(T == Link)) 2274 tagName == "a"; 2275 } 2276 2277 auto par = this.parentNode; 2278 while(par !is null) { 2279 if(tagName is null || par.tagName == tagName) 2280 break; 2281 par = par.parentNode; 2282 } 2283 2284 static if(!is(T == Element)) { 2285 auto t = cast(T) par; 2286 if(t is null) 2287 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2288 } else 2289 auto t = par; 2290 2291 return t; 2292 } 2293 2294 ///. 2295 Element getElementById(string id) { 2296 // FIXME: I use this function a lot, and it's kinda slow 2297 // not terribly slow, but not great. 2298 foreach(e; tree) 2299 if(e.id == id) 2300 return e; 2301 return null; 2302 } 2303 2304 /++ 2305 Returns a child element that matches the given `selector`. 2306 2307 Note: you can give multiple selectors, separated by commas. 2308 It will return the first match it finds. 2309 +/ 2310 @scriptable 2311 Element querySelector(string selector) { 2312 Selector s = Selector(selector); 2313 foreach(ele; tree) 2314 if(s.matchesElement(ele)) 2315 return ele; 2316 return null; 2317 } 2318 2319 /// a more standards-compliant alias for getElementsBySelector 2320 @scriptable 2321 Element[] querySelectorAll(string selector) { 2322 return getElementsBySelector(selector); 2323 } 2324 2325 /// If the element matches the given selector. Previously known as `matchesSelector`. 2326 @scriptable 2327 bool matches(string selector) { 2328 /+ 2329 bool caseSensitiveTags = true; 2330 if(parentDocument && parentDocument.loose) 2331 caseSensitiveTags = false; 2332 +/ 2333 2334 Selector s = Selector(selector); 2335 return s.matchesElement(this); 2336 } 2337 2338 /// Returns itself or the closest parent that matches the given selector, or null if none found 2339 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2340 @scriptable 2341 Element closest(string selector) { 2342 Element e = this; 2343 while(e !is null) { 2344 if(e.matches(selector)) 2345 return e; 2346 e = e.parentNode; 2347 } 2348 return null; 2349 } 2350 2351 /** 2352 Returns elements that match the given CSS selector 2353 2354 * -- all, default if nothing else is there 2355 2356 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2357 2358 It is all additive 2359 2360 OP 2361 2362 space = descendant 2363 > = direct descendant 2364 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2365 2366 [foo] Foo is present as an attribute 2367 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2368 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2369 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2370 2371 [item$=sdas] ends with 2372 [item^-sdsad] begins with 2373 2374 Quotes are optional here. 2375 2376 Pseudos: 2377 :first-child 2378 :last-child 2379 :link (same as a[href] for our purposes here) 2380 2381 2382 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2383 2384 2385 2386 This ONLY cares about elements. text, etc, are ignored 2387 2388 2389 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2390 */ 2391 Element[] getElementsBySelector(string selector) { 2392 // FIXME: this function could probably use some performance attention 2393 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2394 2395 2396 bool caseSensitiveTags = true; 2397 if(parentDocument && parentDocument.loose) 2398 caseSensitiveTags = false; 2399 2400 Element[] ret; 2401 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2402 ret ~= sel.getElements(this); 2403 return ret; 2404 } 2405 2406 /// . 2407 Element[] getElementsByClassName(string cn) { 2408 // is this correct? 2409 return getElementsBySelector("." ~ cn); 2410 } 2411 2412 ///. 2413 Element[] getElementsByTagName(string tag) { 2414 if(parentDocument && parentDocument.loose) 2415 tag = tag.toLower(); 2416 Element[] ret; 2417 foreach(e; tree) 2418 if(e.tagName == tag) 2419 ret ~= e; 2420 return ret; 2421 } 2422 2423 2424 /* ******************************* 2425 Attributes 2426 *********************************/ 2427 2428 /** 2429 Gets the given attribute value, or null if the 2430 attribute is not set. 2431 2432 Note that the returned string is decoded, so it no longer contains any xml entities. 2433 */ 2434 @scriptable 2435 string getAttribute(string name) const { 2436 if(parentDocument && parentDocument.loose) 2437 name = name.toLower(); 2438 auto e = name in attributes; 2439 if(e) 2440 return *e; 2441 else 2442 return null; 2443 } 2444 2445 /** 2446 Sets an attribute. Returns this for easy chaining 2447 */ 2448 @scriptable 2449 Element setAttribute(string name, string value) { 2450 if(parentDocument && parentDocument.loose) 2451 name = name.toLower(); 2452 2453 // I never use this shit legitimately and neither should you 2454 auto it = name.toLower(); 2455 if(it == "href" || it == "src") { 2456 auto v = value.strip().toLower(); 2457 if(v.startsWith("vbscript:")) 2458 value = value[9..$]; 2459 if(v.startsWith("javascript:")) 2460 value = value[11..$]; 2461 } 2462 2463 attributes[name] = value; 2464 2465 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2466 2467 return this; 2468 } 2469 2470 /** 2471 Returns if the attribute exists. 2472 */ 2473 @scriptable 2474 bool hasAttribute(string name) { 2475 if(parentDocument && parentDocument.loose) 2476 name = name.toLower(); 2477 2478 if(name in attributes) 2479 return true; 2480 else 2481 return false; 2482 } 2483 2484 /** 2485 Removes the given attribute from the element. 2486 */ 2487 @scriptable 2488 Element removeAttribute(string name) 2489 out(ret) { 2490 assert(ret is this); 2491 } 2492 do { 2493 if(parentDocument && parentDocument.loose) 2494 name = name.toLower(); 2495 if(name in attributes) 2496 attributes.remove(name); 2497 2498 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2499 return this; 2500 } 2501 2502 /** 2503 Gets the class attribute's contents. Returns 2504 an empty string if it has no class. 2505 */ 2506 @property string className() const { 2507 auto c = getAttribute("class"); 2508 if(c is null) 2509 return ""; 2510 return c; 2511 } 2512 2513 ///. 2514 @property Element className(string c) { 2515 setAttribute("class", c); 2516 return this; 2517 } 2518 2519 /** 2520 Provides easy access to common HTML attributes, object style. 2521 2522 --- 2523 auto element = Element.make("a"); 2524 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2525 string where = a.href; // same as a.getAttribute("href"); 2526 --- 2527 2528 */ 2529 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2530 if(v !is null) 2531 setAttribute(name, v); 2532 return getAttribute(name); 2533 } 2534 2535 /** 2536 Old access to attributes. Use [attrs] instead. 2537 2538 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2539 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2540 2541 Instead, use element.attrs.attribute, element.attrs["attribute"], 2542 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2543 */ 2544 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2545 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2546 } 2547 2548 /* 2549 // this would be nice for convenience, but it broke the getter above. 2550 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2551 if(boolean) 2552 setAttribute(name, name); 2553 else 2554 removeAttribute(name); 2555 } 2556 */ 2557 2558 /** 2559 Returns the element's children. 2560 */ 2561 @property const(Element[]) childNodes() const { 2562 return children; 2563 } 2564 2565 /// Mutable version of the same 2566 @property Element[] childNodes() { // FIXME: the above should be inout 2567 return children; 2568 } 2569 2570 /++ 2571 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2572 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2573 +/ 2574 @property DataSet dataset() { 2575 return DataSet(this); 2576 } 2577 2578 /++ 2579 Gives dot/opIndex access to attributes 2580 --- 2581 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2582 --- 2583 +/ 2584 @property AttributeSet attrs() { 2585 return AttributeSet(this); 2586 } 2587 2588 /++ 2589 Provides both string and object style (like in Javascript) access to the style attribute. 2590 2591 --- 2592 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2593 --- 2594 +/ 2595 @property ElementStyle style() { 2596 return ElementStyle(this); 2597 } 2598 2599 /++ 2600 This sets the style attribute with a string. 2601 +/ 2602 @property ElementStyle style(string s) { 2603 this.setAttribute("style", s); 2604 return this.style; 2605 } 2606 2607 private void parseAttributes(string[] whichOnes = null) { 2608 /+ 2609 if(whichOnes is null) 2610 whichOnes = attributes.keys; 2611 foreach(attr; whichOnes) { 2612 switch(attr) { 2613 case "id": 2614 2615 break; 2616 case "class": 2617 2618 break; 2619 case "style": 2620 2621 break; 2622 default: 2623 // we don't care about it 2624 } 2625 } 2626 +/ 2627 } 2628 2629 2630 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2631 2632 // the next few methods are for implementing interactive kind of things 2633 private CssStyle _computedStyle; 2634 2635 /// Don't use this. 2636 @property CssStyle computedStyle() { 2637 if(_computedStyle is null) { 2638 auto style = this.getAttribute("style"); 2639 /* we'll treat shitty old html attributes as css here */ 2640 if(this.hasAttribute("width")) 2641 style ~= "; width: " ~ this.attrs.width; 2642 if(this.hasAttribute("height")) 2643 style ~= "; height: " ~ this.attrs.height; 2644 if(this.hasAttribute("bgcolor")) 2645 style ~= "; background-color: " ~ this.attrs.bgcolor; 2646 if(this.tagName == "body" && this.hasAttribute("text")) 2647 style ~= "; color: " ~ this.attrs.text; 2648 if(this.hasAttribute("color")) 2649 style ~= "; color: " ~ this.attrs.color; 2650 /* done */ 2651 2652 2653 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2654 } 2655 return _computedStyle; 2656 } 2657 2658 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2659 version(browser) { 2660 void* expansionHook; ///ditto 2661 int offsetWidth; ///ditto 2662 int offsetHeight; ///ditto 2663 int offsetLeft; ///ditto 2664 int offsetTop; ///ditto 2665 Element offsetParent; ///ditto 2666 bool hasLayout; ///ditto 2667 int zIndex; ///ditto 2668 2669 ///ditto 2670 int absoluteLeft() { 2671 int a = offsetLeft; 2672 auto p = offsetParent; 2673 while(p) { 2674 a += p.offsetLeft; 2675 p = p.offsetParent; 2676 } 2677 2678 return a; 2679 } 2680 2681 ///ditto 2682 int absoluteTop() { 2683 int a = offsetTop; 2684 auto p = offsetParent; 2685 while(p) { 2686 a += p.offsetTop; 2687 p = p.offsetParent; 2688 } 2689 2690 return a; 2691 } 2692 } 2693 2694 // Back to the regular dom functions 2695 2696 public: 2697 2698 2699 /* ******************************* 2700 DOM Mutation 2701 *********************************/ 2702 2703 /// Removes all inner content from the tag; all child text and elements are gone. 2704 void removeAllChildren() 2705 out { 2706 assert(this.children.length == 0); 2707 } 2708 do { 2709 foreach(child; children) 2710 child.parentNode = null; 2711 children = null; 2712 } 2713 2714 /// History: added June 13, 2020 2715 Element appendSibling(Element e) { 2716 parentNode.insertAfter(this, e); 2717 return e; 2718 } 2719 2720 /// History: added June 13, 2020 2721 Element prependSibling(Element e) { 2722 parentNode.insertBefore(this, e); 2723 return e; 2724 } 2725 2726 2727 /++ 2728 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 2729 2730 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 2731 2732 History: 2733 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 2734 +/ 2735 Element appendChild(Element e) 2736 in { 2737 assert(e !is null); 2738 } 2739 out (ret) { 2740 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 2741 assert(e.parentDocument is this.parentDocument); 2742 assert(e is ret); 2743 } 2744 do { 2745 if(e.parentNode !is null) 2746 e.parentNode.removeChild(e); 2747 2748 selfClosed = false; 2749 if(auto frag = cast(DocumentFragment) e) 2750 children ~= frag.children; 2751 else 2752 children ~= e; 2753 2754 e.parentNode = this; 2755 2756 /+ 2757 foreach(item; e.tree) 2758 item.parentDocument = this.parentDocument; 2759 +/ 2760 2761 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2762 2763 return e; 2764 } 2765 2766 /// Inserts the second element to this node, right before the first param 2767 Element insertBefore(in Element where, Element what) 2768 in { 2769 assert(where !is null); 2770 assert(where.parentNode is this); 2771 assert(what !is null); 2772 assert(what.parentNode is null); 2773 } 2774 out (ret) { 2775 assert(where.parentNode is this); 2776 assert(what.parentNode is this); 2777 2778 assert(what.parentDocument is this.parentDocument); 2779 assert(ret is what); 2780 } 2781 do { 2782 foreach(i, e; children) { 2783 if(e is where) { 2784 if(auto frag = cast(DocumentFragment) what) { 2785 children = children[0..i] ~ frag.children ~ children[i..$]; 2786 foreach(child; frag.children) 2787 child.parentNode = this; 2788 } else { 2789 children = children[0..i] ~ what ~ children[i..$]; 2790 } 2791 what.parentNode = this; 2792 return what; 2793 } 2794 } 2795 2796 return what; 2797 2798 assert(0); 2799 } 2800 2801 /++ 2802 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2803 +/ 2804 Element insertAfter(in Element where, Element what) 2805 in { 2806 assert(where !is null); 2807 assert(where.parentNode is this); 2808 assert(what !is null); 2809 assert(what.parentNode is null); 2810 } 2811 out (ret) { 2812 assert(where.parentNode is this); 2813 assert(what.parentNode is this); 2814 assert(what.parentDocument is this.parentDocument); 2815 assert(ret is what); 2816 } 2817 do { 2818 foreach(i, e; children) { 2819 if(e is where) { 2820 if(auto frag = cast(DocumentFragment) what) { 2821 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2822 foreach(child; frag.children) 2823 child.parentNode = this; 2824 } else 2825 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2826 what.parentNode = this; 2827 return what; 2828 } 2829 } 2830 2831 return what; 2832 2833 assert(0); 2834 } 2835 2836 /// swaps one child for a new thing. Returns the old child which is now parentless. 2837 Element swapNode(Element child, Element replacement) 2838 in { 2839 assert(child !is null); 2840 assert(replacement !is null); 2841 assert(child.parentNode is this); 2842 } 2843 out(ret) { 2844 assert(ret is child); 2845 assert(ret.parentNode is null); 2846 assert(replacement.parentNode is this); 2847 assert(replacement.parentDocument is this.parentDocument); 2848 } 2849 do { 2850 foreach(ref c; this.children) 2851 if(c is child) { 2852 c.parentNode = null; 2853 c = replacement; 2854 c.parentNode = this; 2855 return child; 2856 } 2857 assert(0); 2858 } 2859 2860 2861 /++ 2862 Appends the given to the node. 2863 2864 2865 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2866 yields `<example>text <b>bold</b> hi</example>`. 2867 2868 See_Also: 2869 [firstInnerText], [directText], [innerText], [appendChild] 2870 +/ 2871 @scriptable 2872 Element appendText(string text) { 2873 Element e = new TextNode(parentDocument, text); 2874 appendChild(e); 2875 return this; 2876 } 2877 2878 /++ 2879 Returns child elements which are of a tag type (excludes text, comments, etc.). 2880 2881 2882 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2883 2884 Params: 2885 tagName = filter results to only the child elements with the given tag name. 2886 +/ 2887 @property Element[] childElements(string tagName = null) { 2888 Element[] ret; 2889 foreach(c; children) 2890 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2891 ret ~= c; 2892 return ret; 2893 } 2894 2895 /++ 2896 Appends the given html to the element, returning the elements appended 2897 2898 2899 This is similar to `element.innerHTML += "html string";` in Javascript. 2900 +/ 2901 @scriptable 2902 Element[] appendHtml(string html) { 2903 Document d = new Document("<root>" ~ html ~ "</root>"); 2904 return stealChildren(d.root); 2905 } 2906 2907 2908 ///. 2909 void insertChildAfter(Element child, Element where) 2910 in { 2911 assert(child !is null); 2912 assert(where !is null); 2913 assert(where.parentNode is this); 2914 assert(!selfClosed); 2915 //assert(isInArray(where, children)); 2916 } 2917 out { 2918 assert(child.parentNode is this); 2919 assert(where.parentNode is this); 2920 //assert(isInArray(where, children)); 2921 //assert(isInArray(child, children)); 2922 } 2923 do { 2924 foreach(ref i, c; children) { 2925 if(c is where) { 2926 i++; 2927 if(auto frag = cast(DocumentFragment) child) { 2928 children = children[0..i] ~ child.children ~ children[i..$]; 2929 //foreach(child; frag.children) 2930 //child.parentNode = this; 2931 } else 2932 children = children[0..i] ~ child ~ children[i..$]; 2933 child.parentNode = this; 2934 break; 2935 } 2936 } 2937 } 2938 2939 /++ 2940 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2941 2942 Params: 2943 e = the element whose children you want to steal 2944 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2945 +/ 2946 Element[] stealChildren(Element e, Element position = null) 2947 in { 2948 assert(!selfClosed); 2949 assert(e !is null); 2950 //if(position !is null) 2951 //assert(isInArray(position, children)); 2952 } 2953 out (ret) { 2954 assert(e.children.length == 0); 2955 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2956 version(none) 2957 debug foreach(child; ret) { 2958 assert(child.parentNode is this); 2959 assert(child.parentDocument is this.parentDocument); 2960 } 2961 } 2962 do { 2963 foreach(c; e.children) { 2964 c.parentNode = this; 2965 } 2966 if(position is null) 2967 children ~= e.children; 2968 else { 2969 foreach(i, child; children) { 2970 if(child is position) { 2971 children = children[0..i] ~ 2972 e.children ~ 2973 children[i..$]; 2974 break; 2975 } 2976 } 2977 } 2978 2979 auto ret = e.children[]; 2980 e.children.length = 0; 2981 2982 return ret; 2983 } 2984 2985 /// Puts the current element first in our children list. The given element must not have a parent already. 2986 Element prependChild(Element e) 2987 in { 2988 assert(e.parentNode is null); 2989 assert(!selfClosed); 2990 } 2991 out { 2992 assert(e.parentNode is this); 2993 assert(e.parentDocument is this.parentDocument); 2994 assert(children[0] is e); 2995 } 2996 do { 2997 if(auto frag = cast(DocumentFragment) e) { 2998 children = e.children ~ children; 2999 foreach(child; frag.children) 3000 child.parentNode = this; 3001 } else 3002 children = e ~ children; 3003 e.parentNode = this; 3004 return e; 3005 } 3006 3007 3008 /** 3009 Returns a string containing all child elements, formatted such that it could be pasted into 3010 an XML file. 3011 */ 3012 @property string innerHTML(Appender!string where = appender!string()) const { 3013 if(children is null) 3014 return ""; 3015 3016 auto start = where.data.length; 3017 3018 foreach(child; children) { 3019 assert(child !is null); 3020 3021 child.writeToAppender(where); 3022 } 3023 3024 return where.data[start .. $]; 3025 } 3026 3027 /** 3028 Takes some html and replaces the element's children with the tree made from the string. 3029 */ 3030 @property Element innerHTML(string html, bool strict = false) { 3031 if(html.length) 3032 selfClosed = false; 3033 3034 if(html.length == 0) { 3035 // I often say innerHTML = ""; as a shortcut to clear it out, 3036 // so let's optimize that slightly. 3037 removeAllChildren(); 3038 return this; 3039 } 3040 3041 auto doc = new Document(); 3042 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 3043 3044 children = doc.root.children; 3045 foreach(c; children) { 3046 c.parentNode = this; 3047 } 3048 3049 doc.root.children = null; 3050 3051 return this; 3052 } 3053 3054 /// ditto 3055 @property Element innerHTML(Html html) { 3056 return this.innerHTML = html.source; 3057 } 3058 3059 /** 3060 Replaces this node with the given html string, which is parsed 3061 3062 Note: this invalidates the this reference, since it is removed 3063 from the tree. 3064 3065 Returns the new children that replace this. 3066 */ 3067 @property Element[] outerHTML(string html) { 3068 auto doc = new Document(); 3069 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3070 3071 children = doc.root.children; 3072 foreach(c; children) { 3073 c.parentNode = this; 3074 } 3075 3076 stripOut(); 3077 3078 return doc.root.children; 3079 } 3080 3081 /++ 3082 Returns all the html for this element, including the tag itself. 3083 3084 This is equivalent to calling toString(). 3085 +/ 3086 @property string outerHTML() { 3087 return this.toString(); 3088 } 3089 3090 /// This sets the inner content of the element *without* trying to parse it. 3091 /// You can inject any code in there; this serves as an escape hatch from the dom. 3092 /// 3093 /// The only times you might actually need it are for < style > and < script > tags in html. 3094 /// Other than that, innerHTML and/or innerText should do the job. 3095 @property void innerRawSource(string rawSource) { 3096 children.length = 0; 3097 auto rs = new RawSource(parentDocument, rawSource); 3098 children ~= rs; 3099 rs.parentNode = this; 3100 } 3101 3102 ///. 3103 Element replaceChild(Element find, Element replace) 3104 in { 3105 assert(find !is null); 3106 assert(find.parentNode is this); 3107 assert(replace !is null); 3108 assert(replace.parentNode is null); 3109 } 3110 out(ret) { 3111 assert(ret is replace); 3112 assert(replace.parentNode is this); 3113 assert(replace.parentDocument is this.parentDocument); 3114 assert(find.parentNode is null); 3115 } 3116 do { 3117 // FIXME 3118 //if(auto frag = cast(DocumentFragment) replace) 3119 //return this.replaceChild(frag, replace.children); 3120 for(int i = 0; i < children.length; i++) { 3121 if(children[i] is find) { 3122 replace.parentNode = this; 3123 children[i].parentNode = null; 3124 children[i] = replace; 3125 return replace; 3126 } 3127 } 3128 3129 throw new Exception("no such child ");// ~ find.toString ~ " among " ~ typeid(this).toString);//.toString ~ " magic \n\n\n" ~ find.parentNode.toString); 3130 } 3131 3132 /** 3133 Replaces the given element with a whole group. 3134 */ 3135 void replaceChild(Element find, Element[] replace) 3136 in { 3137 assert(find !is null); 3138 assert(replace !is null); 3139 assert(find.parentNode is this); 3140 debug foreach(r; replace) 3141 assert(r.parentNode is null); 3142 } 3143 out { 3144 assert(find.parentNode is null); 3145 assert(children.length >= replace.length); 3146 debug foreach(child; children) 3147 assert(child !is find); 3148 debug foreach(r; replace) 3149 assert(r.parentNode is this); 3150 } 3151 do { 3152 if(replace.length == 0) { 3153 removeChild(find); 3154 return; 3155 } 3156 assert(replace.length); 3157 for(int i = 0; i < children.length; i++) { 3158 if(children[i] is find) { 3159 children[i].parentNode = null; // this element should now be dead 3160 children[i] = replace[0]; 3161 foreach(e; replace) { 3162 e.parentNode = this; 3163 } 3164 3165 children = .insertAfter(children, i, replace[1..$]); 3166 3167 return; 3168 } 3169 } 3170 3171 throw new Exception("no such child"); 3172 } 3173 3174 3175 /** 3176 Removes the given child from this list. 3177 3178 Returns the removed element. 3179 */ 3180 Element removeChild(Element c) 3181 in { 3182 assert(c !is null); 3183 assert(c.parentNode is this); 3184 } 3185 out { 3186 debug foreach(child; children) 3187 assert(child !is c); 3188 assert(c.parentNode is null); 3189 } 3190 do { 3191 foreach(i, e; children) { 3192 if(e is c) { 3193 children = children[0..i] ~ children [i+1..$]; 3194 c.parentNode = null; 3195 return c; 3196 } 3197 } 3198 3199 throw new Exception("no such child"); 3200 } 3201 3202 /// This removes all the children from this element, returning the old list. 3203 Element[] removeChildren() 3204 out (ret) { 3205 assert(children.length == 0); 3206 debug foreach(r; ret) 3207 assert(r.parentNode is null); 3208 } 3209 do { 3210 Element[] oldChildren = children.dup; 3211 foreach(c; oldChildren) 3212 c.parentNode = null; 3213 3214 children.length = 0; 3215 3216 return oldChildren; 3217 } 3218 3219 /** 3220 Fetch the inside text, with all tags stripped out. 3221 3222 <p>cool <b>api</b> & code dude<p> 3223 innerText of that is "cool api & code dude". 3224 3225 This does not match what real innerText does! 3226 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3227 3228 It is more like textContent. 3229 */ 3230 @scriptable 3231 @property string innerText() const { 3232 string s; 3233 foreach(child; children) { 3234 if(child.nodeType != NodeType.Text) 3235 s ~= child.innerText; 3236 else 3237 s ~= child.nodeValue(); 3238 } 3239 return s; 3240 } 3241 3242 /// 3243 alias textContent = innerText; 3244 3245 /** 3246 Sets the inside text, replacing all children. You don't 3247 have to worry about entity encoding. 3248 */ 3249 @scriptable 3250 @property void innerText(string text) { 3251 selfClosed = false; 3252 Element e = new TextNode(parentDocument, text); 3253 children = [e]; 3254 e.parentNode = this; 3255 } 3256 3257 /** 3258 Strips this node out of the document, replacing it with the given text 3259 */ 3260 @property void outerText(string text) { 3261 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3262 } 3263 3264 /** 3265 Same result as innerText; the tag with all inner tags stripped out 3266 */ 3267 @property string outerText() const { 3268 return innerText; 3269 } 3270 3271 3272 /* ******************************* 3273 Miscellaneous 3274 *********************************/ 3275 3276 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3277 @property Element cloned() 3278 /+ 3279 out(ret) { 3280 // FIXME: not sure why these fail... 3281 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3282 assert(ret.tagName == this.tagName); 3283 } 3284 do { 3285 +/ 3286 { 3287 return this.cloneNode(true); 3288 } 3289 3290 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3291 Element cloneNode(bool deepClone) { 3292 auto e = Element.make(this.tagName); 3293 e.attributes = this.attributes.aadup; 3294 e.selfClosed = this.selfClosed; 3295 3296 if(deepClone) { 3297 foreach(child; children) { 3298 e.appendChild(child.cloneNode(true)); 3299 } 3300 } 3301 3302 3303 return e; 3304 } 3305 3306 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3307 string nodeValue() const { 3308 return ""; 3309 } 3310 3311 // should return int 3312 ///. 3313 @property int nodeType() const { 3314 return 1; 3315 } 3316 3317 3318 invariant () { 3319 debug assert(tagName.indexOf(" ") == -1); 3320 3321 // commented cuz it gets into recursive pain and eff dat. 3322 /+ 3323 if(children !is null) 3324 foreach(child; children) { 3325 // assert(parentNode !is null); 3326 assert(child !is null); 3327 assert(child.parent_.asElement is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parent_.asElement is null ? "null" : child.parent_.asElement.tagName)); 3328 assert(child !is this); 3329 //assert(child !is parentNode); 3330 } 3331 +/ 3332 3333 /+ 3334 // this isn't helping 3335 if(parent_ && parent_.asElement) { 3336 bool found = false; 3337 foreach(child; parent_.asElement.children) 3338 if(child is this) 3339 found = true; 3340 assert(found, format("%s lists %s as parent, but it is not in children", typeid(this), typeid(this.parent_.asElement))); 3341 } 3342 +/ 3343 3344 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3345 if(parentNode !is null) { 3346 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3347 auto lol = cast(TextNode) this; 3348 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3349 } 3350 +/ 3351 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3352 // reason is so you can create these without needing a reference to the document 3353 } 3354 3355 /** 3356 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3357 an XML file. 3358 */ 3359 override string toString() const { 3360 return writeToAppender(); 3361 } 3362 3363 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3364 if(indentWith is null) 3365 return null; 3366 string s; 3367 3368 if(insertComments) s ~= "<!--"; 3369 s ~= "\n"; 3370 foreach(indent; 0 .. indentationLevel) 3371 s ~= indentWith; 3372 if(insertComments) s ~= "-->"; 3373 3374 return s; 3375 } 3376 3377 /++ 3378 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3379 for eyeball debugging. 3380 +/ 3381 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3382 3383 // first step is to concatenate any consecutive text nodes to simplify 3384 // the white space analysis. this changes the tree! but i'm allowed since 3385 // the comment always says it changes the comments 3386 // 3387 // actually i'm not allowed cuz it is const so i will cheat and lie 3388 /+ 3389 TextNode lastTextChild = null; 3390 for(int a = 0; a < this.children.length; a++) { 3391 auto child = this.children[a]; 3392 if(auto tn = cast(TextNode) child) { 3393 if(lastTextChild) { 3394 lastTextChild.contents ~= tn.contents; 3395 for(int b = a; b < this.children.length - 1; b++) 3396 this.children[b] = this.children[b + 1]; 3397 this.children = this.children[0 .. $-1]; 3398 } else { 3399 lastTextChild = tn; 3400 } 3401 } else { 3402 lastTextChild = null; 3403 } 3404 } 3405 +/ 3406 3407 auto inlineElements = (parentDocument is null ? null : parentDocument.inlineElements); 3408 3409 const(Element)[] children; 3410 3411 TextNode lastTextChild = null; 3412 for(int a = 0; a < this.children.length; a++) { 3413 auto child = this.children[a]; 3414 if(auto tn = cast(const(TextNode)) child) { 3415 if(lastTextChild !is null) { 3416 lastTextChild.contents ~= tn.contents; 3417 } else { 3418 lastTextChild = new TextNode(""); 3419 lastTextChild.parentNode = cast(Element) this; 3420 lastTextChild.contents ~= tn.contents; 3421 children ~= lastTextChild; 3422 } 3423 } else { 3424 lastTextChild = null; 3425 children ~= child; 3426 } 3427 } 3428 3429 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3430 3431 s ~= "<"; 3432 s ~= tagName; 3433 3434 // i sort these for consistent output. might be more legible 3435 // but especially it keeps it the same for diff purposes. 3436 import std.algorithm : sort; 3437 auto keys = sort(attributes.keys); 3438 foreach(n; keys) { 3439 auto v = attributes[n]; 3440 s ~= " "; 3441 s ~= n; 3442 s ~= "=\""; 3443 s ~= htmlEntitiesEncode(v); 3444 s ~= "\""; 3445 } 3446 3447 if(selfClosed){ 3448 s ~= " />"; 3449 return s; 3450 } 3451 3452 s ~= ">"; 3453 3454 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3455 // just keep them on the same line 3456 if(tagName.isInArray(inlineElements) || allAreInlineHtml(children, inlineElements)) { 3457 foreach(child; children) { 3458 s ~= child.toString();//toPrettyString(false, 0, null); 3459 } 3460 } else { 3461 foreach(child; children) { 3462 assert(child !is null); 3463 3464 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3465 } 3466 3467 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3468 } 3469 3470 s ~= "</"; 3471 s ~= tagName; 3472 s ~= ">"; 3473 3474 return s; 3475 } 3476 3477 /+ 3478 /// Writes out the opening tag only, if applicable. 3479 string writeTagOnly(Appender!string where = appender!string()) const { 3480 +/ 3481 3482 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3483 /// Note: the ordering of attributes in the string is undefined. 3484 /// Returns the string it creates. 3485 string writeToAppender(Appender!string where = appender!string()) const { 3486 assert(tagName !is null); 3487 3488 where.reserve((this.children.length + 1) * 512); 3489 3490 auto start = where.data.length; 3491 3492 where.put("<"); 3493 where.put(tagName); 3494 3495 import std.algorithm : sort; 3496 auto keys = sort(attributes.keys); 3497 foreach(n; keys) { 3498 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3499 //assert(v !is null); 3500 where.put(" "); 3501 where.put(n); 3502 where.put("=\""); 3503 htmlEntitiesEncode(v, where); 3504 where.put("\""); 3505 } 3506 3507 if(selfClosed){ 3508 where.put(" />"); 3509 return where.data[start .. $]; 3510 } 3511 3512 where.put('>'); 3513 3514 innerHTML(where); 3515 3516 where.put("</"); 3517 where.put(tagName); 3518 where.put('>'); 3519 3520 return where.data[start .. $]; 3521 } 3522 3523 /** 3524 Returns a lazy range of all its children, recursively. 3525 */ 3526 @property ElementStream tree() { 3527 return new ElementStream(this); 3528 } 3529 3530 // I moved these from Form because they are generally useful. 3531 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3532 /// Tags: HTML, HTML5 3533 // FIXME: add overloads for other label types... 3534 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3535 auto fs = this; 3536 auto i = fs.addChild("label"); 3537 3538 if(!(type == "checkbox" || type == "radio")) 3539 i.addChild("span", label); 3540 3541 Element input; 3542 if(type == "textarea") 3543 input = i.addChild("textarea"). 3544 setAttribute("name", name). 3545 setAttribute("rows", "6"); 3546 else 3547 input = i.addChild("input"). 3548 setAttribute("name", name). 3549 setAttribute("type", type); 3550 3551 if(type == "checkbox" || type == "radio") 3552 i.addChild("span", label); 3553 3554 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3555 fieldOptions.applyToElement(input); 3556 return i; 3557 } 3558 3559 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3560 auto fs = this; 3561 auto i = fs.addChild("label"); 3562 i.addChild(label); 3563 Element input; 3564 if(type == "textarea") 3565 input = i.addChild("textarea"). 3566 setAttribute("name", name). 3567 setAttribute("rows", "6"); 3568 else 3569 input = i.addChild("input"). 3570 setAttribute("name", name). 3571 setAttribute("type", type); 3572 3573 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3574 fieldOptions.applyToElement(input); 3575 return i; 3576 } 3577 3578 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3579 return addField(label, name, "text", fieldOptions); 3580 } 3581 3582 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 3583 auto fs = this; 3584 auto i = fs.addChild("label"); 3585 i.addChild("span", label); 3586 auto sel = i.addChild("select").setAttribute("name", name); 3587 3588 foreach(k, opt; options) 3589 sel.addChild("option", opt, k); 3590 3591 // FIXME: implement requirements somehow 3592 3593 return i; 3594 } 3595 3596 Element addSubmitButton(string label = null) { 3597 auto t = this; 3598 auto holder = t.addChild("div"); 3599 holder.addClass("submit-holder"); 3600 auto i = holder.addChild("input"); 3601 i.type = "submit"; 3602 if(label.length) 3603 i.value = label; 3604 return holder; 3605 } 3606 3607 } 3608 // computedStyle could argubaly be removed to bring size down 3609 //pragma(msg, __traits(classInstanceSize, Element)); 3610 //pragma(msg, Element.tupleof); 3611 3612 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3613 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3614 /// Group: core_functionality 3615 class XmlDocument : Document { 3616 this(string data) { 3617 selfClosedElements = null; 3618 inlineElements = null; 3619 contentType = "text/xml; charset=utf-8"; 3620 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3621 3622 parseStrict(data); 3623 } 3624 } 3625 3626 3627 3628 3629 import std.string; 3630 3631 /* domconvenience follows { */ 3632 3633 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3634 /// Group: core_functionality 3635 Element[] findComments(Document document, string txt) { 3636 return findComments(document.root, txt); 3637 } 3638 3639 /// ditto 3640 Element[] findComments(Element element, string txt) { 3641 txt = txt.strip().toLower(); 3642 Element[] ret; 3643 3644 foreach(comment; element.getElementsByTagName("#comment")) { 3645 string t = comment.nodeValue().strip().toLower(); 3646 if(t == txt) 3647 ret ~= comment; 3648 } 3649 3650 return ret; 3651 } 3652 3653 /// An option type that propagates null. See: [Element.optionSelector] 3654 /// Group: implementations 3655 struct MaybeNullElement(SomeElementType) { 3656 this(SomeElementType ele) { 3657 this.element = ele; 3658 } 3659 SomeElementType element; 3660 3661 /// Forwards to the element, wit a null check inserted that propagates null. 3662 auto opDispatch(string method, T...)(T args) { 3663 alias type = typeof(__traits(getMember, element, method)(args)); 3664 static if(is(type : Element)) { 3665 if(element is null) 3666 return MaybeNullElement!type(null); 3667 return __traits(getMember, element, method)(args); 3668 } else static if(is(type == string)) { 3669 if(element is null) 3670 return cast(string) null; 3671 return __traits(getMember, element, method)(args); 3672 } else static if(is(type == void)) { 3673 if(element is null) 3674 return; 3675 __traits(getMember, element, method)(args); 3676 } else { 3677 static assert(0); 3678 } 3679 } 3680 3681 /// Allows implicit casting to the wrapped element. 3682 alias element this; 3683 } 3684 3685 /++ 3686 A collection of elements which forwards methods to the children. 3687 +/ 3688 /// Group: implementations 3689 struct ElementCollection { 3690 /// 3691 this(Element e) { 3692 elements = [e]; 3693 } 3694 3695 /// 3696 this(Element e, string selector) { 3697 elements = e.querySelectorAll(selector); 3698 } 3699 3700 /// 3701 this(Element[] e) { 3702 elements = e; 3703 } 3704 3705 Element[] elements; 3706 //alias elements this; // let it implicitly convert to the underlying array 3707 3708 /// 3709 ElementCollection opIndex(string selector) { 3710 ElementCollection ec; 3711 foreach(e; elements) 3712 ec.elements ~= e.getElementsBySelector(selector); 3713 return ec; 3714 } 3715 3716 /// 3717 Element opIndex(int i) { 3718 return elements[i]; 3719 } 3720 3721 /// if you slice it, give the underlying array for easy forwarding of the 3722 /// collection to range expecting algorithms or looping over. 3723 Element[] opSlice() { 3724 return elements; 3725 } 3726 3727 /// And input range primitives so we can foreach over this 3728 void popFront() { 3729 elements = elements[1..$]; 3730 } 3731 3732 /// ditto 3733 Element front() { 3734 return elements[0]; 3735 } 3736 3737 /// ditto 3738 bool empty() { 3739 return !elements.length; 3740 } 3741 3742 /++ 3743 Collects strings from the collection, concatenating them together 3744 Kinda like running reduce and ~= on it. 3745 3746 --- 3747 document["p"].collect!"innerText"; 3748 --- 3749 +/ 3750 string collect(string method)(string separator = "") { 3751 string text; 3752 foreach(e; elements) { 3753 text ~= mixin("e." ~ method); 3754 text ~= separator; 3755 } 3756 return text; 3757 } 3758 3759 /// Forward method calls to each individual [Element|element] of the collection 3760 /// returns this so it can be chained. 3761 ElementCollection opDispatch(string name, T...)(T t) { 3762 foreach(e; elements) { 3763 mixin("e." ~ name)(t); 3764 } 3765 return this; 3766 } 3767 3768 /++ 3769 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3770 +/ 3771 ElementCollection wrapIn(Element what) { 3772 foreach(e; elements) { 3773 e.wrapIn(what.cloneNode(false)); 3774 } 3775 3776 return this; 3777 } 3778 3779 /// Concatenates two ElementCollection together. 3780 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3781 return ElementCollection(this.elements ~ rhs.elements); 3782 } 3783 } 3784 3785 3786 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3787 /// Group: implementations 3788 mixin template JavascriptStyleDispatch() { 3789 /// 3790 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 3791 if(v !is null) 3792 return set(name, v); 3793 return get(name); 3794 } 3795 3796 /// 3797 string opIndex(string key) const { 3798 return get(key); 3799 } 3800 3801 /// 3802 string opIndexAssign(string value, string field) { 3803 return set(field, value); 3804 } 3805 3806 // FIXME: doesn't seem to work 3807 string* opBinary(string op)(string key) if(op == "in") { 3808 return key in fields; 3809 } 3810 } 3811 3812 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3813 /// 3814 /// Do not create this object directly. 3815 /// Group: implementations 3816 struct DataSet { 3817 /// 3818 this(Element e) { 3819 this._element = e; 3820 } 3821 3822 private Element _element; 3823 /// 3824 string set(string name, string value) { 3825 _element.setAttribute("data-" ~ unCamelCase(name), value); 3826 return value; 3827 } 3828 3829 /// 3830 string get(string name) const { 3831 return _element.getAttribute("data-" ~ unCamelCase(name)); 3832 } 3833 3834 /// 3835 mixin JavascriptStyleDispatch!(); 3836 } 3837 3838 /// Proxy object for attributes which will replace the main opDispatch eventually 3839 /// Group: implementations 3840 struct AttributeSet { 3841 /// 3842 this(Element e) { 3843 this._element = e; 3844 } 3845 3846 private Element _element; 3847 /// 3848 string set(string name, string value) { 3849 _element.setAttribute(name, value); 3850 return value; 3851 } 3852 3853 /// 3854 string get(string name) const { 3855 return _element.getAttribute(name); 3856 } 3857 3858 /// 3859 mixin JavascriptStyleDispatch!(); 3860 } 3861 3862 3863 3864 /// for style, i want to be able to set it with a string like a plain attribute, 3865 /// but also be able to do properties Javascript style. 3866 3867 /// Group: implementations 3868 struct ElementStyle { 3869 this(Element parent) { 3870 _element = parent; 3871 } 3872 3873 Element _element; 3874 3875 @property ref inout(string) _attribute() inout { 3876 auto s = "style" in _element.attributes; 3877 if(s is null) { 3878 auto e = cast() _element; // const_cast 3879 e.attributes["style"] = ""; // we need something to reference 3880 s = cast(inout) ("style" in e.attributes); 3881 } 3882 3883 assert(s !is null); 3884 return *s; 3885 } 3886 3887 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3888 3889 string set(string name, string value) { 3890 if(name.length == 0) 3891 return value; 3892 if(name == "cssFloat") 3893 name = "float"; 3894 else 3895 name = unCamelCase(name); 3896 auto r = rules(); 3897 r[name] = value; 3898 3899 _attribute = ""; 3900 foreach(k, v; r) { 3901 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3902 continue; 3903 if(_attribute.length) 3904 _attribute ~= " "; 3905 _attribute ~= k ~ ": " ~ v ~ ";"; 3906 } 3907 3908 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3909 3910 return value; 3911 } 3912 string get(string name) const { 3913 if(name == "cssFloat") 3914 name = "float"; 3915 else 3916 name = unCamelCase(name); 3917 auto r = rules(); 3918 if(name in r) 3919 return r[name]; 3920 return null; 3921 } 3922 3923 string[string] rules() const { 3924 string[string] ret; 3925 foreach(rule; _attribute.split(";")) { 3926 rule = rule.strip(); 3927 if(rule.length == 0) 3928 continue; 3929 auto idx = rule.indexOf(":"); 3930 if(idx == -1) 3931 ret[rule] = ""; 3932 else { 3933 auto name = rule[0 .. idx].strip(); 3934 auto value = rule[idx + 1 .. $].strip(); 3935 3936 ret[name] = value; 3937 } 3938 } 3939 3940 return ret; 3941 } 3942 3943 mixin JavascriptStyleDispatch!(); 3944 } 3945 3946 /// Converts a camel cased propertyName to a css style dashed property-name 3947 string unCamelCase(string a) { 3948 string ret; 3949 foreach(c; a) 3950 if((c >= 'A' && c <= 'Z')) 3951 ret ~= "-" ~ toLower("" ~ c)[0]; 3952 else 3953 ret ~= c; 3954 return ret; 3955 } 3956 3957 /// Translates a css style property-name to a camel cased propertyName 3958 string camelCase(string a) { 3959 string ret; 3960 bool justSawDash = false; 3961 foreach(c; a) 3962 if(c == '-') { 3963 justSawDash = true; 3964 } else { 3965 if(justSawDash) { 3966 justSawDash = false; 3967 ret ~= toUpper("" ~ c); 3968 } else 3969 ret ~= c; 3970 } 3971 return ret; 3972 } 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 // domconvenience ends } 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 // @safe: 3995 3996 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 3997 // Instead, override writeToAppender(); 3998 3999 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 4000 4001 // Stripping them is useful for reading php as html.... but adding them 4002 // is good for building php. 4003 4004 // I need to maintain compatibility with the way it is now too. 4005 4006 import std.string; 4007 import std.exception; 4008 import std.uri; 4009 import std.array; 4010 import std.range; 4011 4012 //import std.stdio; 4013 4014 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 4015 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 4016 // most likely a typo so I say kill kill kill. 4017 4018 4019 /++ 4020 This might belong in another module, but it represents a file with a mime type and some data. 4021 Document implements this interface with type = text/html (see Document.contentType for more info) 4022 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 4023 +/ 4024 /// Group: bonus_functionality 4025 interface FileResource { 4026 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 4027 @property string contentType() const; 4028 /// the data 4029 immutable(ubyte)[] getData() const; 4030 /++ 4031 filename, return null if none 4032 4033 History: 4034 Added December 25, 2020 4035 +/ 4036 @property string filename() const; 4037 } 4038 4039 4040 4041 4042 ///. 4043 /// Group: bonus_functionality 4044 enum NodeType { Text = 3 } 4045 4046 4047 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 4048 /// Group: core_functionality 4049 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 4050 in {} 4051 out(ret) { assert(ret !is null); } 4052 do { 4053 auto ret = cast(T) e; 4054 if(ret is null) 4055 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 4056 return ret; 4057 } 4058 4059 4060 ///. 4061 /// Group: core_functionality 4062 class DocumentFragment : Element { 4063 ///. 4064 this(Document _parentDocument) { 4065 tagName = "#fragment"; 4066 super(_parentDocument); 4067 } 4068 4069 /++ 4070 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 4071 4072 Since: March 29, 2018 (or git tagged v2.1.0) 4073 +/ 4074 this(Html html) { 4075 this(null); 4076 4077 this.innerHTML = html.source; 4078 } 4079 4080 ///. 4081 override string writeToAppender(Appender!string where = appender!string()) const { 4082 return this.innerHTML(where); 4083 } 4084 4085 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 4086 string s; 4087 foreach(child; children) 4088 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 4089 return s; 4090 } 4091 4092 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 4093 /* 4094 override inout(Element) parentNode() inout { 4095 return children.length ? children[0].parentNode : null; 4096 } 4097 */ 4098 /+ 4099 override Element parentNode(Element p) { 4100 this.parentNode = p; 4101 foreach(child; children) 4102 child.parentNode = p; 4103 return p; 4104 } 4105 +/ 4106 } 4107 4108 /// Given text, encode all html entities on it - &, <, >, and ". This function also 4109 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 4110 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 4111 /// 4112 /// The output parameter can be given to append to an existing buffer. You don't have to 4113 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 4114 /// Group: core_functionality 4115 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 4116 // if there's no entities, we can save a lot of time by not bothering with the 4117 // decoding loop. This check cuts the net toString time by better than half in my test. 4118 // let me know if it made your tests worse though, since if you use an entity in just about 4119 // every location, the check will add time... but I suspect the average experience is like mine 4120 // since the check gives up as soon as it can anyway. 4121 4122 bool shortcut = true; 4123 foreach(char c; data) { 4124 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 4125 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4126 shortcut = false; // there's actual work to be done 4127 break; 4128 } 4129 } 4130 4131 if(shortcut) { 4132 output.put(data); 4133 return data; 4134 } 4135 4136 auto start = output.data.length; 4137 4138 output.reserve(data.length + 64); // grab some extra space for the encoded entities 4139 4140 foreach(dchar d; data) { 4141 if(d == '&') 4142 output.put("&"); 4143 else if (d == '<') 4144 output.put("<"); 4145 else if (d == '>') 4146 output.put(">"); 4147 else if (d == '\"') 4148 output.put("""); 4149 // else if (d == '\'') 4150 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 4151 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 4152 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 4153 // idk about apostrophes though. Might be worth it, might not. 4154 else if (!encodeNonAscii || (d < 128 && d > 0)) 4155 output.put(d); 4156 else 4157 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 4158 } 4159 4160 //assert(output !is null); // this fails on empty attributes..... 4161 return output.data[start .. $]; 4162 4163 // data = data.replace("\u00a0", " "); 4164 } 4165 4166 /// An alias for htmlEntitiesEncode; it works for xml too 4167 /// Group: core_functionality 4168 string xmlEntitiesEncode(string data) { 4169 return htmlEntitiesEncode(data); 4170 } 4171 4172 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 4173 /// Group: core_functionality 4174 dchar parseEntity(in dchar[] entity) { 4175 switch(entity[1..$-1]) { 4176 case "quot": 4177 return '"'; 4178 case "apos": 4179 return '\''; 4180 case "lt": 4181 return '<'; 4182 case "gt": 4183 return '>'; 4184 case "amp": 4185 return '&'; 4186 // the next are html rather than xml 4187 4188 // Retrieved from https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references 4189 // Only entities that resolve to U+0009 ~ U+1D56B are stated. 4190 case "Tab": return '\u0009'; 4191 case "NewLine": return '\u000A'; 4192 case "excl": return '\u0021'; 4193 case "QUOT": return '\u0022'; 4194 case "num": return '\u0023'; 4195 case "dollar": return '\u0024'; 4196 case "percnt": return '\u0025'; 4197 case "AMP": return '\u0026'; 4198 case "lpar": return '\u0028'; 4199 case "rpar": return '\u0029'; 4200 case "ast": case "midast": return '\u002A'; 4201 case "plus": return '\u002B'; 4202 case "comma": return '\u002C'; 4203 case "period": return '\u002E'; 4204 case "sol": return '\u002F'; 4205 case "colon": return '\u003A'; 4206 case "semi": return '\u003B'; 4207 case "LT": return '\u003C'; 4208 case "equals": return '\u003D'; 4209 case "GT": return '\u003E'; 4210 case "quest": return '\u003F'; 4211 case "commat": return '\u0040'; 4212 case "lsqb": case "lbrack": return '\u005B'; 4213 case "bsol": return '\u005C'; 4214 case "rsqb": case "rbrack": return '\u005D'; 4215 case "Hat": return '\u005E'; 4216 case "lowbar": case "UnderBar": return '\u005F'; 4217 case "grave": case "DiacriticalGrave": return '\u0060'; 4218 case "lcub": case "lbrace": return '\u007B'; 4219 case "verbar": case "vert": case "VerticalLine": return '\u007C'; 4220 case "rcub": case "rbrace": return '\u007D'; 4221 case "nbsp": case "NonBreakingSpace": return '\u00A0'; 4222 case "iexcl": return '\u00A1'; 4223 case "cent": return '\u00A2'; 4224 case "pound": return '\u00A3'; 4225 case "curren": return '\u00A4'; 4226 case "yen": return '\u00A5'; 4227 case "brvbar": return '\u00A6'; 4228 case "sect": return '\u00A7'; 4229 case "Dot": case "die": case "DoubleDot": case "uml": return '\u00A8'; 4230 case "copy": case "COPY": return '\u00A9'; 4231 case "ordf": return '\u00AA'; 4232 case "laquo": return '\u00AB'; 4233 case "not": return '\u00AC'; 4234 case "shy": return '\u00AD'; 4235 case "reg": case "circledR": case "REG": return '\u00AE'; 4236 case "macr": case "strns": return '\u00AF'; 4237 case "deg": return '\u00B0'; 4238 case "plusmn": case "pm": case "PlusMinus": return '\u00B1'; 4239 case "sup2": return '\u00B2'; 4240 case "sup3": return '\u00B3'; 4241 case "acute": case "DiacriticalAcute": return '\u00B4'; 4242 case "micro": return '\u00B5'; 4243 case "para": return '\u00B6'; 4244 case "middot": case "centerdot": case "CenterDot": return '\u00B7'; 4245 case "cedil": case "Cedilla": return '\u00B8'; 4246 case "sup1": return '\u00B9'; 4247 case "ordm": return '\u00BA'; 4248 case "raquo": return '\u00BB'; 4249 case "frac14": return '\u00BC'; 4250 case "frac12": case "half": return '\u00BD'; 4251 case "frac34": return '\u00BE'; 4252 case "iquest": return '\u00BF'; 4253 case "Agrave": return '\u00C0'; 4254 case "Aacute": return '\u00C1'; 4255 case "Acirc": return '\u00C2'; 4256 case "Atilde": return '\u00C3'; 4257 case "Auml": return '\u00C4'; 4258 case "Aring": case "angst": return '\u00C5'; 4259 case "AElig": return '\u00C6'; 4260 case "Ccedil": return '\u00C7'; 4261 case "Egrave": return '\u00C8'; 4262 case "Eacute": return '\u00C9'; 4263 case "Ecirc": return '\u00CA'; 4264 case "Euml": return '\u00CB'; 4265 case "Igrave": return '\u00CC'; 4266 case "Iacute": return '\u00CD'; 4267 case "Icirc": return '\u00CE'; 4268 case "Iuml": return '\u00CF'; 4269 case "ETH": return '\u00D0'; 4270 case "Ntilde": return '\u00D1'; 4271 case "Ograve": return '\u00D2'; 4272 case "Oacute": return '\u00D3'; 4273 case "Ocirc": return '\u00D4'; 4274 case "Otilde": return '\u00D5'; 4275 case "Ouml": return '\u00D6'; 4276 case "times": return '\u00D7'; 4277 case "Oslash": return '\u00D8'; 4278 case "Ugrave": return '\u00D9'; 4279 case "Uacute": return '\u00DA'; 4280 case "Ucirc": return '\u00DB'; 4281 case "Uuml": return '\u00DC'; 4282 case "Yacute": return '\u00DD'; 4283 case "THORN": return '\u00DE'; 4284 case "szlig": return '\u00DF'; 4285 case "agrave": return '\u00E0'; 4286 case "aacute": return '\u00E1'; 4287 case "acirc": return '\u00E2'; 4288 case "atilde": return '\u00E3'; 4289 case "auml": return '\u00E4'; 4290 case "aring": return '\u00E5'; 4291 case "aelig": return '\u00E6'; 4292 case "ccedil": return '\u00E7'; 4293 case "egrave": return '\u00E8'; 4294 case "eacute": return '\u00E9'; 4295 case "ecirc": return '\u00EA'; 4296 case "euml": return '\u00EB'; 4297 case "igrave": return '\u00EC'; 4298 case "iacute": return '\u00ED'; 4299 case "icirc": return '\u00EE'; 4300 case "iuml": return '\u00EF'; 4301 case "eth": return '\u00F0'; 4302 case "ntilde": return '\u00F1'; 4303 case "ograve": return '\u00F2'; 4304 case "oacute": return '\u00F3'; 4305 case "ocirc": return '\u00F4'; 4306 case "otilde": return '\u00F5'; 4307 case "ouml": return '\u00F6'; 4308 case "divide": case "div": return '\u00F7'; 4309 case "oslash": return '\u00F8'; 4310 case "ugrave": return '\u00F9'; 4311 case "uacute": return '\u00FA'; 4312 case "ucirc": return '\u00FB'; 4313 case "uuml": return '\u00FC'; 4314 case "yacute": return '\u00FD'; 4315 case "thorn": return '\u00FE'; 4316 case "yuml": return '\u00FF'; 4317 case "Amacr": return '\u0100'; 4318 case "amacr": return '\u0101'; 4319 case "Abreve": return '\u0102'; 4320 case "abreve": return '\u0103'; 4321 case "Aogon": return '\u0104'; 4322 case "aogon": return '\u0105'; 4323 case "Cacute": return '\u0106'; 4324 case "cacute": return '\u0107'; 4325 case "Ccirc": return '\u0108'; 4326 case "ccirc": return '\u0109'; 4327 case "Cdot": return '\u010A'; 4328 case "cdot": return '\u010B'; 4329 case "Ccaron": return '\u010C'; 4330 case "ccaron": return '\u010D'; 4331 case "Dcaron": return '\u010E'; 4332 case "dcaron": return '\u010F'; 4333 case "Dstrok": return '\u0110'; 4334 case "dstrok": return '\u0111'; 4335 case "Emacr": return '\u0112'; 4336 case "emacr": return '\u0113'; 4337 case "Edot": return '\u0116'; 4338 case "edot": return '\u0117'; 4339 case "Eogon": return '\u0118'; 4340 case "eogon": return '\u0119'; 4341 case "Ecaron": return '\u011A'; 4342 case "ecaron": return '\u011B'; 4343 case "Gcirc": return '\u011C'; 4344 case "gcirc": return '\u011D'; 4345 case "Gbreve": return '\u011E'; 4346 case "gbreve": return '\u011F'; 4347 case "Gdot": return '\u0120'; 4348 case "gdot": return '\u0121'; 4349 case "Gcedil": return '\u0122'; 4350 case "Hcirc": return '\u0124'; 4351 case "hcirc": return '\u0125'; 4352 case "Hstrok": return '\u0126'; 4353 case "hstrok": return '\u0127'; 4354 case "Itilde": return '\u0128'; 4355 case "itilde": return '\u0129'; 4356 case "Imacr": return '\u012A'; 4357 case "imacr": return '\u012B'; 4358 case "Iogon": return '\u012E'; 4359 case "iogon": return '\u012F'; 4360 case "Idot": return '\u0130'; 4361 case "imath": case "inodot": return '\u0131'; 4362 case "IJlig": return '\u0132'; 4363 case "ijlig": return '\u0133'; 4364 case "Jcirc": return '\u0134'; 4365 case "jcirc": return '\u0135'; 4366 case "Kcedil": return '\u0136'; 4367 case "kcedil": return '\u0137'; 4368 case "kgreen": return '\u0138'; 4369 case "Lacute": return '\u0139'; 4370 case "lacute": return '\u013A'; 4371 case "Lcedil": return '\u013B'; 4372 case "lcedil": return '\u013C'; 4373 case "Lcaron": return '\u013D'; 4374 case "lcaron": return '\u013E'; 4375 case "Lmidot": return '\u013F'; 4376 case "lmidot": return '\u0140'; 4377 case "Lstrok": return '\u0141'; 4378 case "lstrok": return '\u0142'; 4379 case "Nacute": return '\u0143'; 4380 case "nacute": return '\u0144'; 4381 case "Ncedil": return '\u0145'; 4382 case "ncedil": return '\u0146'; 4383 case "Ncaron": return '\u0147'; 4384 case "ncaron": return '\u0148'; 4385 case "napos": return '\u0149'; 4386 case "ENG": return '\u014A'; 4387 case "eng": return '\u014B'; 4388 case "Omacr": return '\u014C'; 4389 case "omacr": return '\u014D'; 4390 case "Odblac": return '\u0150'; 4391 case "odblac": return '\u0151'; 4392 case "OElig": return '\u0152'; 4393 case "oelig": return '\u0153'; 4394 case "Racute": return '\u0154'; 4395 case "racute": return '\u0155'; 4396 case "Rcedil": return '\u0156'; 4397 case "rcedil": return '\u0157'; 4398 case "Rcaron": return '\u0158'; 4399 case "rcaron": return '\u0159'; 4400 case "Sacute": return '\u015A'; 4401 case "sacute": return '\u015B'; 4402 case "Scirc": return '\u015C'; 4403 case "scirc": return '\u015D'; 4404 case "Scedil": return '\u015E'; 4405 case "scedil": return '\u015F'; 4406 case "Scaron": return '\u0160'; 4407 case "scaron": return '\u0161'; 4408 case "Tcedil": return '\u0162'; 4409 case "tcedil": return '\u0163'; 4410 case "Tcaron": return '\u0164'; 4411 case "tcaron": return '\u0165'; 4412 case "Tstrok": return '\u0166'; 4413 case "tstrok": return '\u0167'; 4414 case "Utilde": return '\u0168'; 4415 case "utilde": return '\u0169'; 4416 case "Umacr": return '\u016A'; 4417 case "umacr": return '\u016B'; 4418 case "Ubreve": return '\u016C'; 4419 case "ubreve": return '\u016D'; 4420 case "Uring": return '\u016E'; 4421 case "uring": return '\u016F'; 4422 case "Udblac": return '\u0170'; 4423 case "udblac": return '\u0171'; 4424 case "Uogon": return '\u0172'; 4425 case "uogon": return '\u0173'; 4426 case "Wcirc": return '\u0174'; 4427 case "wcirc": return '\u0175'; 4428 case "Ycirc": return '\u0176'; 4429 case "ycirc": return '\u0177'; 4430 case "Yuml": return '\u0178'; 4431 case "Zacute": return '\u0179'; 4432 case "zacute": return '\u017A'; 4433 case "Zdot": return '\u017B'; 4434 case "zdot": return '\u017C'; 4435 case "Zcaron": return '\u017D'; 4436 case "zcaron": return '\u017E'; 4437 case "fnof": return '\u0192'; 4438 case "imped": return '\u01B5'; 4439 case "gacute": return '\u01F5'; 4440 case "jmath": return '\u0237'; 4441 case "circ": return '\u02C6'; 4442 case "caron": case "Hacek": return '\u02C7'; 4443 case "breve": case "Breve": return '\u02D8'; 4444 case "dot": case "DiacriticalDot": return '\u02D9'; 4445 case "ring": return '\u02DA'; 4446 case "ogon": return '\u02DB'; 4447 case "tilde": case "DiacriticalTilde": return '\u02DC'; 4448 case "dblac": case "DiacriticalDoubleAcute": return '\u02DD'; 4449 case "DownBreve": return '\u0311'; 4450 case "Alpha": return '\u0391'; 4451 case "Beta": return '\u0392'; 4452 case "Gamma": return '\u0393'; 4453 case "Delta": return '\u0394'; 4454 case "Epsilon": return '\u0395'; 4455 case "Zeta": return '\u0396'; 4456 case "Eta": return '\u0397'; 4457 case "Theta": return '\u0398'; 4458 case "Iota": return '\u0399'; 4459 case "Kappa": return '\u039A'; 4460 case "Lambda": return '\u039B'; 4461 case "Mu": return '\u039C'; 4462 case "Nu": return '\u039D'; 4463 case "Xi": return '\u039E'; 4464 case "Omicron": return '\u039F'; 4465 case "Pi": return '\u03A0'; 4466 case "Rho": return '\u03A1'; 4467 case "Sigma": return '\u03A3'; 4468 case "Tau": return '\u03A4'; 4469 case "Upsilon": return '\u03A5'; 4470 case "Phi": return '\u03A6'; 4471 case "Chi": return '\u03A7'; 4472 case "Psi": return '\u03A8'; 4473 case "Omega": case "ohm": return '\u03A9'; 4474 case "alpha": return '\u03B1'; 4475 case "beta": return '\u03B2'; 4476 case "gamma": return '\u03B3'; 4477 case "delta": return '\u03B4'; 4478 case "epsi": case "epsilon": return '\u03B5'; 4479 case "zeta": return '\u03B6'; 4480 case "eta": return '\u03B7'; 4481 case "theta": return '\u03B8'; 4482 case "iota": return '\u03B9'; 4483 case "kappa": return '\u03BA'; 4484 case "lambda": return '\u03BB'; 4485 case "mu": return '\u03BC'; 4486 case "nu": return '\u03BD'; 4487 case "xi": return '\u03BE'; 4488 case "omicron": return '\u03BF'; 4489 case "pi": return '\u03C0'; 4490 case "rho": return '\u03C1'; 4491 case "sigmav": case "varsigma": case "sigmaf": return '\u03C2'; 4492 case "sigma": return '\u03C3'; 4493 case "tau": return '\u03C4'; 4494 case "upsi": case "upsilon": return '\u03C5'; 4495 case "phi": return '\u03C6'; 4496 case "chi": return '\u03C7'; 4497 case "psi": return '\u03C8'; 4498 case "omega": return '\u03C9'; 4499 case "thetav": case "vartheta": case "thetasym": return '\u03D1'; 4500 case "Upsi": case "upsih": return '\u03D2'; 4501 case "straightphi": case "phiv": case "varphi": return '\u03D5'; 4502 case "piv": case "varpi": return '\u03D6'; 4503 case "Gammad": return '\u03DC'; 4504 case "gammad": case "digamma": return '\u03DD'; 4505 case "kappav": case "varkappa": return '\u03F0'; 4506 case "rhov": case "varrho": return '\u03F1'; 4507 case "epsiv": case "varepsilon": case "straightepsilon": return '\u03F5'; 4508 case "bepsi": case "backepsilon": return '\u03F6'; 4509 case "IOcy": return '\u0401'; 4510 case "DJcy": return '\u0402'; 4511 case "GJcy": return '\u0403'; 4512 case "Jukcy": return '\u0404'; 4513 case "DScy": return '\u0405'; 4514 case "Iukcy": return '\u0406'; 4515 case "YIcy": return '\u0407'; 4516 case "Jsercy": return '\u0408'; 4517 case "LJcy": return '\u0409'; 4518 case "NJcy": return '\u040A'; 4519 case "TSHcy": return '\u040B'; 4520 case "KJcy": return '\u040C'; 4521 case "Ubrcy": return '\u040E'; 4522 case "DZcy": return '\u040F'; 4523 case "Acy": return '\u0410'; 4524 case "Bcy": return '\u0411'; 4525 case "Vcy": return '\u0412'; 4526 case "Gcy": return '\u0413'; 4527 case "Dcy": return '\u0414'; 4528 case "IEcy": return '\u0415'; 4529 case "ZHcy": return '\u0416'; 4530 case "Zcy": return '\u0417'; 4531 case "Icy": return '\u0418'; 4532 case "Jcy": return '\u0419'; 4533 case "Kcy": return '\u041A'; 4534 case "Lcy": return '\u041B'; 4535 case "Mcy": return '\u041C'; 4536 case "Ncy": return '\u041D'; 4537 case "Ocy": return '\u041E'; 4538 case "Pcy": return '\u041F'; 4539 case "Rcy": return '\u0420'; 4540 case "Scy": return '\u0421'; 4541 case "Tcy": return '\u0422'; 4542 case "Ucy": return '\u0423'; 4543 case "Fcy": return '\u0424'; 4544 case "KHcy": return '\u0425'; 4545 case "TScy": return '\u0426'; 4546 case "CHcy": return '\u0427'; 4547 case "SHcy": return '\u0428'; 4548 case "SHCHcy": return '\u0429'; 4549 case "HARDcy": return '\u042A'; 4550 case "Ycy": return '\u042B'; 4551 case "SOFTcy": return '\u042C'; 4552 case "Ecy": return '\u042D'; 4553 case "YUcy": return '\u042E'; 4554 case "YAcy": return '\u042F'; 4555 case "acy": return '\u0430'; 4556 case "bcy": return '\u0431'; 4557 case "vcy": return '\u0432'; 4558 case "gcy": return '\u0433'; 4559 case "dcy": return '\u0434'; 4560 case "iecy": return '\u0435'; 4561 case "zhcy": return '\u0436'; 4562 case "zcy": return '\u0437'; 4563 case "icy": return '\u0438'; 4564 case "jcy": return '\u0439'; 4565 case "kcy": return '\u043A'; 4566 case "lcy": return '\u043B'; 4567 case "mcy": return '\u043C'; 4568 case "ncy": return '\u043D'; 4569 case "ocy": return '\u043E'; 4570 case "pcy": return '\u043F'; 4571 case "rcy": return '\u0440'; 4572 case "scy": return '\u0441'; 4573 case "tcy": return '\u0442'; 4574 case "ucy": return '\u0443'; 4575 case "fcy": return '\u0444'; 4576 case "khcy": return '\u0445'; 4577 case "tscy": return '\u0446'; 4578 case "chcy": return '\u0447'; 4579 case "shcy": return '\u0448'; 4580 case "shchcy": return '\u0449'; 4581 case "hardcy": return '\u044A'; 4582 case "ycy": return '\u044B'; 4583 case "softcy": return '\u044C'; 4584 case "ecy": return '\u044D'; 4585 case "yucy": return '\u044E'; 4586 case "yacy": return '\u044F'; 4587 case "iocy": return '\u0451'; 4588 case "djcy": return '\u0452'; 4589 case "gjcy": return '\u0453'; 4590 case "jukcy": return '\u0454'; 4591 case "dscy": return '\u0455'; 4592 case "iukcy": return '\u0456'; 4593 case "yicy": return '\u0457'; 4594 case "jsercy": return '\u0458'; 4595 case "ljcy": return '\u0459'; 4596 case "njcy": return '\u045A'; 4597 case "tshcy": return '\u045B'; 4598 case "kjcy": return '\u045C'; 4599 case "ubrcy": return '\u045E'; 4600 case "dzcy": return '\u045F'; 4601 case "ensp": return '\u2002'; 4602 case "emsp": return '\u2003'; 4603 case "emsp13": return '\u2004'; 4604 case "emsp14": return '\u2005'; 4605 case "numsp": return '\u2007'; 4606 case "puncsp": return '\u2008'; 4607 case "thinsp": case "ThinSpace": return '\u2009'; 4608 case "hairsp": case "VeryThinSpace": return '\u200A'; 4609 case "ZeroWidthSpace": case "NegativeVeryThinSpace": case "NegativeThinSpace": case "NegativeMediumSpace": case "NegativeThickSpace": return '\u200B'; 4610 case "zwnj": return '\u200C'; 4611 case "zwj": return '\u200D'; 4612 case "lrm": return '\u200E'; 4613 case "rlm": return '\u200F'; 4614 case "hyphen": case "dash": return '\u2010'; 4615 case "ndash": return '\u2013'; 4616 case "mdash": return '\u2014'; 4617 case "horbar": return '\u2015'; 4618 case "Verbar": case "Vert": return '\u2016'; 4619 case "lsquo": case "OpenCurlyQuote": return '\u2018'; 4620 case "rsquo": case "rsquor": case "CloseCurlyQuote": return '\u2019'; 4621 case "lsquor": case "sbquo": return '\u201A'; 4622 case "ldquo": case "OpenCurlyDoubleQuote": return '\u201C'; 4623 case "rdquo": case "rdquor": case "CloseCurlyDoubleQuote": return '\u201D'; 4624 case "ldquor": case "bdquo": return '\u201E'; 4625 case "dagger": return '\u2020'; 4626 case "Dagger": case "ddagger": return '\u2021'; 4627 case "bull": case "bullet": return '\u2022'; 4628 case "nldr": return '\u2025'; 4629 case "hellip": case "mldr": return '\u2026'; 4630 case "permil": return '\u2030'; 4631 case "pertenk": return '\u2031'; 4632 case "prime": return '\u2032'; 4633 case "Prime": return '\u2033'; 4634 case "tprime": return '\u2034'; 4635 case "bprime": case "backprime": return '\u2035'; 4636 case "lsaquo": return '\u2039'; 4637 case "rsaquo": return '\u203A'; 4638 case "oline": case "OverBar": return '\u203E'; 4639 case "caret": return '\u2041'; 4640 case "hybull": return '\u2043'; 4641 case "frasl": return '\u2044'; 4642 case "bsemi": return '\u204F'; 4643 case "qprime": return '\u2057'; 4644 case "MediumSpace": return '\u205F'; 4645 case "NoBreak": return '\u2060'; 4646 case "ApplyFunction": case "af": return '\u2061'; 4647 case "InvisibleTimes": case "it": return '\u2062'; 4648 case "InvisibleComma": case "ic": return '\u2063'; 4649 case "euro": return '\u20AC'; 4650 case "tdot": case "TripleDot": return '\u20DB'; 4651 case "DotDot": return '\u20DC'; 4652 case "Copf": case "complexes": return '\u2102'; 4653 case "incare": return '\u2105'; 4654 case "gscr": return '\u210A'; 4655 case "hamilt": case "HilbertSpace": case "Hscr": return '\u210B'; 4656 case "Hfr": case "Poincareplane": return '\u210C'; 4657 case "quaternions": case "Hopf": return '\u210D'; 4658 case "planckh": return '\u210E'; 4659 case "planck": case "hbar": case "plankv": case "hslash": return '\u210F'; 4660 case "Iscr": case "imagline": return '\u2110'; 4661 case "image": case "Im": case "imagpart": case "Ifr": return '\u2111'; 4662 case "Lscr": case "lagran": case "Laplacetrf": return '\u2112'; 4663 case "ell": return '\u2113'; 4664 case "Nopf": case "naturals": return '\u2115'; 4665 case "numero": return '\u2116'; 4666 case "copysr": return '\u2117'; 4667 case "weierp": case "wp": return '\u2118'; 4668 case "Popf": case "primes": return '\u2119'; 4669 case "rationals": case "Qopf": return '\u211A'; 4670 case "Rscr": case "realine": return '\u211B'; 4671 case "real": case "Re": case "realpart": case "Rfr": return '\u211C'; 4672 case "reals": case "Ropf": return '\u211D'; 4673 case "rx": return '\u211E'; 4674 case "trade": case "TRADE": return '\u2122'; 4675 case "integers": case "Zopf": return '\u2124'; 4676 case "mho": return '\u2127'; 4677 case "Zfr": case "zeetrf": return '\u2128'; 4678 case "iiota": return '\u2129'; 4679 case "bernou": case "Bernoullis": case "Bscr": return '\u212C'; 4680 case "Cfr": case "Cayleys": return '\u212D'; 4681 case "escr": return '\u212F'; 4682 case "Escr": case "expectation": return '\u2130'; 4683 case "Fscr": case "Fouriertrf": return '\u2131'; 4684 case "phmmat": case "Mellintrf": case "Mscr": return '\u2133'; 4685 case "order": case "orderof": case "oscr": return '\u2134'; 4686 case "alefsym": case "aleph": return '\u2135'; 4687 case "beth": return '\u2136'; 4688 case "gimel": return '\u2137'; 4689 case "daleth": return '\u2138'; 4690 case "CapitalDifferentialD": case "DD": return '\u2145'; 4691 case "DifferentialD": case "dd": return '\u2146'; 4692 case "ExponentialE": case "exponentiale": case "ee": return '\u2147'; 4693 case "ImaginaryI": case "ii": return '\u2148'; 4694 case "frac13": return '\u2153'; 4695 case "frac23": return '\u2154'; 4696 case "frac15": return '\u2155'; 4697 case "frac25": return '\u2156'; 4698 case "frac35": return '\u2157'; 4699 case "frac45": return '\u2158'; 4700 case "frac16": return '\u2159'; 4701 case "frac56": return '\u215A'; 4702 case "frac18": return '\u215B'; 4703 case "frac38": return '\u215C'; 4704 case "frac58": return '\u215D'; 4705 case "frac78": return '\u215E'; 4706 case "larr": case "leftarrow": case "LeftArrow": case "slarr": case "ShortLeftArrow": return '\u2190'; 4707 case "uarr": case "uparrow": case "UpArrow": case "ShortUpArrow": return '\u2191'; 4708 case "rarr": case "rightarrow": case "RightArrow": case "srarr": case "ShortRightArrow": return '\u2192'; 4709 case "darr": case "downarrow": case "DownArrow": case "ShortDownArrow": return '\u2193'; 4710 case "harr": case "leftrightarrow": case "LeftRightArrow": return '\u2194'; 4711 case "varr": case "updownarrow": case "UpDownArrow": return '\u2195'; 4712 case "nwarr": case "UpperLeftArrow": case "nwarrow": return '\u2196'; 4713 case "nearr": case "UpperRightArrow": case "nearrow": return '\u2197'; 4714 case "searr": case "searrow": case "LowerRightArrow": return '\u2198'; 4715 case "swarr": case "swarrow": case "LowerLeftArrow": return '\u2199'; 4716 case "nlarr": case "nleftarrow": return '\u219A'; 4717 case "nrarr": case "nrightarrow": return '\u219B'; 4718 case "rarrw": case "rightsquigarrow": return '\u219D'; 4719 case "Larr": case "twoheadleftarrow": return '\u219E'; 4720 case "Uarr": return '\u219F'; 4721 case "Rarr": case "twoheadrightarrow": return '\u21A0'; 4722 case "Darr": return '\u21A1'; 4723 case "larrtl": case "leftarrowtail": return '\u21A2'; 4724 case "rarrtl": case "rightarrowtail": return '\u21A3'; 4725 case "LeftTeeArrow": case "mapstoleft": return '\u21A4'; 4726 case "UpTeeArrow": case "mapstoup": return '\u21A5'; 4727 case "map": case "RightTeeArrow": case "mapsto": return '\u21A6'; 4728 case "DownTeeArrow": case "mapstodown": return '\u21A7'; 4729 case "larrhk": case "hookleftarrow": return '\u21A9'; 4730 case "rarrhk": case "hookrightarrow": return '\u21AA'; 4731 case "larrlp": case "looparrowleft": return '\u21AB'; 4732 case "rarrlp": case "looparrowright": return '\u21AC'; 4733 case "harrw": case "leftrightsquigarrow": return '\u21AD'; 4734 case "nharr": case "nleftrightarrow": return '\u21AE'; 4735 case "lsh": case "Lsh": return '\u21B0'; 4736 case "rsh": case "Rsh": return '\u21B1'; 4737 case "ldsh": return '\u21B2'; 4738 case "rdsh": return '\u21B3'; 4739 case "crarr": return '\u21B5'; 4740 case "cularr": case "curvearrowleft": return '\u21B6'; 4741 case "curarr": case "curvearrowright": return '\u21B7'; 4742 case "olarr": case "circlearrowleft": return '\u21BA'; 4743 case "orarr": case "circlearrowright": return '\u21BB'; 4744 case "lharu": case "LeftVector": case "leftharpoonup": return '\u21BC'; 4745 case "lhard": case "leftharpoondown": case "DownLeftVector": return '\u21BD'; 4746 case "uharr": case "upharpoonright": case "RightUpVector": return '\u21BE'; 4747 case "uharl": case "upharpoonleft": case "LeftUpVector": return '\u21BF'; 4748 case "rharu": case "RightVector": case "rightharpoonup": return '\u21C0'; 4749 case "rhard": case "rightharpoondown": case "DownRightVector": return '\u21C1'; 4750 case "dharr": case "RightDownVector": case "downharpoonright": return '\u21C2'; 4751 case "dharl": case "LeftDownVector": case "downharpoonleft": return '\u21C3'; 4752 case "rlarr": case "rightleftarrows": case "RightArrowLeftArrow": return '\u21C4'; 4753 case "udarr": case "UpArrowDownArrow": return '\u21C5'; 4754 case "lrarr": case "leftrightarrows": case "LeftArrowRightArrow": return '\u21C6'; 4755 case "llarr": case "leftleftarrows": return '\u21C7'; 4756 case "uuarr": case "upuparrows": return '\u21C8'; 4757 case "rrarr": case "rightrightarrows": return '\u21C9'; 4758 case "ddarr": case "downdownarrows": return '\u21CA'; 4759 case "lrhar": case "ReverseEquilibrium": case "leftrightharpoons": return '\u21CB'; 4760 case "rlhar": case "rightleftharpoons": case "Equilibrium": return '\u21CC'; 4761 case "nlArr": case "nLeftarrow": return '\u21CD'; 4762 case "nhArr": case "nLeftrightarrow": return '\u21CE'; 4763 case "nrArr": case "nRightarrow": return '\u21CF'; 4764 case "lArr": case "Leftarrow": case "DoubleLeftArrow": return '\u21D0'; 4765 case "uArr": case "Uparrow": case "DoubleUpArrow": return '\u21D1'; 4766 case "rArr": case "Rightarrow": case "Implies": case "DoubleRightArrow": return '\u21D2'; 4767 case "dArr": case "Downarrow": case "DoubleDownArrow": return '\u21D3'; 4768 case "hArr": case "Leftrightarrow": case "DoubleLeftRightArrow": case "iff": return '\u21D4'; 4769 case "vArr": case "Updownarrow": case "DoubleUpDownArrow": return '\u21D5'; 4770 case "nwArr": return '\u21D6'; 4771 case "neArr": return '\u21D7'; 4772 case "seArr": return '\u21D8'; 4773 case "swArr": return '\u21D9'; 4774 case "lAarr": case "Lleftarrow": return '\u21DA'; 4775 case "rAarr": case "Rrightarrow": return '\u21DB'; 4776 case "zigrarr": return '\u21DD'; 4777 case "larrb": case "LeftArrowBar": return '\u21E4'; 4778 case "rarrb": case "RightArrowBar": return '\u21E5'; 4779 case "duarr": case "DownArrowUpArrow": return '\u21F5'; 4780 case "loarr": return '\u21FD'; 4781 case "roarr": return '\u21FE'; 4782 case "hoarr": return '\u21FF'; 4783 case "forall": case "ForAll": return '\u2200'; 4784 case "comp": case "complement": return '\u2201'; 4785 case "part": case "PartialD": return '\u2202'; 4786 case "exist": case "Exists": return '\u2203'; 4787 case "nexist": case "NotExists": case "nexists": return '\u2204'; 4788 case "empty": case "emptyset": case "emptyv": case "varnothing": return '\u2205'; 4789 case "nabla": case "Del": return '\u2207'; 4790 case "isin": case "isinv": case "Element": case "in": return '\u2208'; 4791 case "notin": case "NotElement": case "notinva": return '\u2209'; 4792 case "niv": case "ReverseElement": case "ni": case "SuchThat": return '\u220B'; 4793 case "notni": case "notniva": case "NotReverseElement": return '\u220C'; 4794 case "prod": case "Product": return '\u220F'; 4795 case "coprod": case "Coproduct": return '\u2210'; 4796 case "sum": case "Sum": return '\u2211'; 4797 case "minus": return '\u2212'; 4798 case "mnplus": case "mp": case "MinusPlus": return '\u2213'; 4799 case "plusdo": case "dotplus": return '\u2214'; 4800 case "setmn": case "setminus": case "Backslash": case "ssetmn": case "smallsetminus": return '\u2216'; 4801 case "lowast": return '\u2217'; 4802 case "compfn": case "SmallCircle": return '\u2218'; 4803 case "radic": case "Sqrt": return '\u221A'; 4804 case "prop": case "propto": case "Proportional": case "vprop": case "varpropto": return '\u221D'; 4805 case "infin": return '\u221E'; 4806 case "angrt": return '\u221F'; 4807 case "ang": case "angle": return '\u2220'; 4808 case "angmsd": case "measuredangle": return '\u2221'; 4809 case "angsph": return '\u2222'; 4810 case "mid": case "VerticalBar": case "smid": case "shortmid": return '\u2223'; 4811 case "nmid": case "NotVerticalBar": case "nsmid": case "nshortmid": return '\u2224'; 4812 case "par": case "parallel": case "DoubleVerticalBar": case "spar": case "shortparallel": return '\u2225'; 4813 case "npar": case "nparallel": case "NotDoubleVerticalBar": case "nspar": case "nshortparallel": return '\u2226'; 4814 case "and": case "wedge": return '\u2227'; 4815 case "or": case "vee": return '\u2228'; 4816 case "cap": return '\u2229'; 4817 case "cup": return '\u222A'; 4818 case "int": case "Integral": return '\u222B'; 4819 case "Int": return '\u222C'; 4820 case "tint": case "iiint": return '\u222D'; 4821 case "conint": case "oint": case "ContourIntegral": return '\u222E'; 4822 case "Conint": case "DoubleContourIntegral": return '\u222F'; 4823 case "Cconint": return '\u2230'; 4824 case "cwint": return '\u2231'; 4825 case "cwconint": case "ClockwiseContourIntegral": return '\u2232'; 4826 case "awconint": case "CounterClockwiseContourIntegral": return '\u2233'; 4827 case "there4": case "therefore": case "Therefore": return '\u2234'; 4828 case "becaus": case "because": case "Because": return '\u2235'; 4829 case "ratio": return '\u2236'; 4830 case "Colon": case "Proportion": return '\u2237'; 4831 case "minusd": case "dotminus": return '\u2238'; 4832 case "mDDot": return '\u223A'; 4833 case "homtht": return '\u223B'; 4834 case "sim": case "Tilde": case "thksim": case "thicksim": return '\u223C'; 4835 case "bsim": case "backsim": return '\u223D'; 4836 case "ac": case "mstpos": return '\u223E'; 4837 case "acd": return '\u223F'; 4838 case "wreath": case "VerticalTilde": case "wr": return '\u2240'; 4839 case "nsim": case "NotTilde": return '\u2241'; 4840 case "esim": case "EqualTilde": case "eqsim": return '\u2242'; 4841 case "sime": case "TildeEqual": case "simeq": return '\u2243'; 4842 case "nsime": case "nsimeq": case "NotTildeEqual": return '\u2244'; 4843 case "cong": case "TildeFullEqual": return '\u2245'; 4844 case "simne": return '\u2246'; 4845 case "ncong": case "NotTildeFullEqual": return '\u2247'; 4846 case "asymp": case "ap": case "TildeTilde": case "approx": case "thkap": case "thickapprox": return '\u2248'; 4847 case "nap": case "NotTildeTilde": case "napprox": return '\u2249'; 4848 case "ape": case "approxeq": return '\u224A'; 4849 case "apid": return '\u224B'; 4850 case "bcong": case "backcong": return '\u224C'; 4851 case "asympeq": case "CupCap": return '\u224D'; 4852 case "bump": case "HumpDownHump": case "Bumpeq": return '\u224E'; 4853 case "bumpe": case "HumpEqual": case "bumpeq": return '\u224F'; 4854 case "esdot": case "DotEqual": case "doteq": return '\u2250'; 4855 case "eDot": case "doteqdot": return '\u2251'; 4856 case "efDot": case "fallingdotseq": return '\u2252'; 4857 case "erDot": case "risingdotseq": return '\u2253'; 4858 case "colone": case "coloneq": case "Assign": return '\u2254'; 4859 case "ecolon": case "eqcolon": return '\u2255'; 4860 case "ecir": case "eqcirc": return '\u2256'; 4861 case "cire": case "circeq": return '\u2257'; 4862 case "wedgeq": return '\u2259'; 4863 case "veeeq": return '\u225A'; 4864 case "trie": case "triangleq": return '\u225C'; 4865 case "equest": case "questeq": return '\u225F'; 4866 case "ne": case "NotEqual": return '\u2260'; 4867 case "equiv": case "Congruent": return '\u2261'; 4868 case "nequiv": case "NotCongruent": return '\u2262'; 4869 case "le": case "leq": return '\u2264'; 4870 case "ge": case "GreaterEqual": case "geq": return '\u2265'; 4871 case "lE": case "LessFullEqual": case "leqq": return '\u2266'; 4872 case "gE": case "GreaterFullEqual": case "geqq": return '\u2267'; 4873 case "lnE": case "lneqq": return '\u2268'; 4874 case "gnE": case "gneqq": return '\u2269'; 4875 case "Lt": case "NestedLessLess": case "ll": return '\u226A'; 4876 case "Gt": case "NestedGreaterGreater": case "gg": return '\u226B'; 4877 case "twixt": case "between": return '\u226C'; 4878 case "NotCupCap": return '\u226D'; 4879 case "nlt": case "NotLess": case "nless": return '\u226E'; 4880 case "ngt": case "NotGreater": case "ngtr": return '\u226F'; 4881 case "nle": case "NotLessEqual": case "nleq": return '\u2270'; 4882 case "nge": case "NotGreaterEqual": case "ngeq": return '\u2271'; 4883 case "lsim": case "LessTilde": case "lesssim": return '\u2272'; 4884 case "gsim": case "gtrsim": case "GreaterTilde": return '\u2273'; 4885 case "nlsim": case "NotLessTilde": return '\u2274'; 4886 case "ngsim": case "NotGreaterTilde": return '\u2275'; 4887 case "lg": case "lessgtr": case "LessGreater": return '\u2276'; 4888 case "gl": case "gtrless": case "GreaterLess": return '\u2277'; 4889 case "ntlg": case "NotLessGreater": return '\u2278'; 4890 case "ntgl": case "NotGreaterLess": return '\u2279'; 4891 case "pr": case "Precedes": case "prec": return '\u227A'; 4892 case "sc": case "Succeeds": case "succ": return '\u227B'; 4893 case "prcue": case "PrecedesSlantEqual": case "preccurlyeq": return '\u227C'; 4894 case "sccue": case "SucceedsSlantEqual": case "succcurlyeq": return '\u227D'; 4895 case "prsim": case "precsim": case "PrecedesTilde": return '\u227E'; 4896 case "scsim": case "succsim": case "SucceedsTilde": return '\u227F'; 4897 case "npr": case "nprec": case "NotPrecedes": return '\u2280'; 4898 case "nsc": case "nsucc": case "NotSucceeds": return '\u2281'; 4899 case "sub": case "subset": return '\u2282'; 4900 case "sup": case "supset": case "Superset": return '\u2283'; 4901 case "nsub": return '\u2284'; 4902 case "nsup": return '\u2285'; 4903 case "sube": case "SubsetEqual": case "subseteq": return '\u2286'; 4904 case "supe": case "supseteq": case "SupersetEqual": return '\u2287'; 4905 case "nsube": case "nsubseteq": case "NotSubsetEqual": return '\u2288'; 4906 case "nsupe": case "nsupseteq": case "NotSupersetEqual": return '\u2289'; 4907 case "subne": case "subsetneq": return '\u228A'; 4908 case "supne": case "supsetneq": return '\u228B'; 4909 case "cupdot": return '\u228D'; 4910 case "uplus": case "UnionPlus": return '\u228E'; 4911 case "sqsub": case "SquareSubset": case "sqsubset": return '\u228F'; 4912 case "sqsup": case "SquareSuperset": case "sqsupset": return '\u2290'; 4913 case "sqsube": case "SquareSubsetEqual": case "sqsubseteq": return '\u2291'; 4914 case "sqsupe": case "SquareSupersetEqual": case "sqsupseteq": return '\u2292'; 4915 case "sqcap": case "SquareIntersection": return '\u2293'; 4916 case "sqcup": case "SquareUnion": return '\u2294'; 4917 case "oplus": case "CirclePlus": return '\u2295'; 4918 case "ominus": case "CircleMinus": return '\u2296'; 4919 case "otimes": case "CircleTimes": return '\u2297'; 4920 case "osol": return '\u2298'; 4921 case "odot": case "CircleDot": return '\u2299'; 4922 case "ocir": case "circledcirc": return '\u229A'; 4923 case "oast": case "circledast": return '\u229B'; 4924 case "odash": case "circleddash": return '\u229D'; 4925 case "plusb": case "boxplus": return '\u229E'; 4926 case "minusb": case "boxminus": return '\u229F'; 4927 case "timesb": case "boxtimes": return '\u22A0'; 4928 case "sdotb": case "dotsquare": return '\u22A1'; 4929 case "vdash": case "RightTee": return '\u22A2'; 4930 case "dashv": case "LeftTee": return '\u22A3'; 4931 case "top": case "DownTee": return '\u22A4'; 4932 case "bottom": case "bot": case "perp": case "UpTee": return '\u22A5'; 4933 case "models": return '\u22A7'; 4934 case "vDash": case "DoubleRightTee": return '\u22A8'; 4935 case "Vdash": return '\u22A9'; 4936 case "Vvdash": return '\u22AA'; 4937 case "VDash": return '\u22AB'; 4938 case "nvdash": return '\u22AC'; 4939 case "nvDash": return '\u22AD'; 4940 case "nVdash": return '\u22AE'; 4941 case "nVDash": return '\u22AF'; 4942 case "prurel": return '\u22B0'; 4943 case "vltri": case "vartriangleleft": case "LeftTriangle": return '\u22B2'; 4944 case "vrtri": case "vartriangleright": case "RightTriangle": return '\u22B3'; 4945 case "ltrie": case "trianglelefteq": case "LeftTriangleEqual": return '\u22B4'; 4946 case "rtrie": case "trianglerighteq": case "RightTriangleEqual": return '\u22B5'; 4947 case "origof": return '\u22B6'; 4948 case "imof": return '\u22B7'; 4949 case "mumap": case "multimap": return '\u22B8'; 4950 case "hercon": return '\u22B9'; 4951 case "intcal": case "intercal": return '\u22BA'; 4952 case "veebar": return '\u22BB'; 4953 case "barvee": return '\u22BD'; 4954 case "angrtvb": return '\u22BE'; 4955 case "lrtri": return '\u22BF'; 4956 case "xwedge": case "Wedge": case "bigwedge": return '\u22C0'; 4957 case "xvee": case "Vee": case "bigvee": return '\u22C1'; 4958 case "xcap": case "Intersection": case "bigcap": return '\u22C2'; 4959 case "xcup": case "Union": case "bigcup": return '\u22C3'; 4960 case "diam": case "diamond": case "Diamond": return '\u22C4'; 4961 case "sdot": return '\u22C5'; 4962 case "sstarf": case "Star": return '\u22C6'; 4963 case "divonx": case "divideontimes": return '\u22C7'; 4964 case "bowtie": return '\u22C8'; 4965 case "ltimes": return '\u22C9'; 4966 case "rtimes": return '\u22CA'; 4967 case "lthree": case "leftthreetimes": return '\u22CB'; 4968 case "rthree": case "rightthreetimes": return '\u22CC'; 4969 case "bsime": case "backsimeq": return '\u22CD'; 4970 case "cuvee": case "curlyvee": return '\u22CE'; 4971 case "cuwed": case "curlywedge": return '\u22CF'; 4972 case "Sub": case "Subset": return '\u22D0'; 4973 case "Sup": case "Supset": return '\u22D1'; 4974 case "Cap": return '\u22D2'; 4975 case "Cup": return '\u22D3'; 4976 case "fork": case "pitchfork": return '\u22D4'; 4977 case "epar": return '\u22D5'; 4978 case "ltdot": case "lessdot": return '\u22D6'; 4979 case "gtdot": case "gtrdot": return '\u22D7'; 4980 case "Ll": return '\u22D8'; 4981 case "Gg": case "ggg": return '\u22D9'; 4982 case "leg": case "LessEqualGreater": case "lesseqgtr": return '\u22DA'; 4983 case "gel": case "gtreqless": case "GreaterEqualLess": return '\u22DB'; 4984 case "cuepr": case "curlyeqprec": return '\u22DE'; 4985 case "cuesc": case "curlyeqsucc": return '\u22DF'; 4986 case "nprcue": case "NotPrecedesSlantEqual": return '\u22E0'; 4987 case "nsccue": case "NotSucceedsSlantEqual": return '\u22E1'; 4988 case "nsqsube": case "NotSquareSubsetEqual": return '\u22E2'; 4989 case "nsqsupe": case "NotSquareSupersetEqual": return '\u22E3'; 4990 case "lnsim": return '\u22E6'; 4991 case "gnsim": return '\u22E7'; 4992 case "prnsim": case "precnsim": return '\u22E8'; 4993 case "scnsim": case "succnsim": return '\u22E9'; 4994 case "nltri": case "ntriangleleft": case "NotLeftTriangle": return '\u22EA'; 4995 case "nrtri": case "ntriangleright": case "NotRightTriangle": return '\u22EB'; 4996 case "nltrie": case "ntrianglelefteq": case "NotLeftTriangleEqual": return '\u22EC'; 4997 case "nrtrie": case "ntrianglerighteq": case "NotRightTriangleEqual": return '\u22ED'; 4998 case "vellip": return '\u22EE'; 4999 case "ctdot": return '\u22EF'; 5000 case "utdot": return '\u22F0'; 5001 case "dtdot": return '\u22F1'; 5002 case "disin": return '\u22F2'; 5003 case "isinsv": return '\u22F3'; 5004 case "isins": return '\u22F4'; 5005 case "isindot": return '\u22F5'; 5006 case "notinvc": return '\u22F6'; 5007 case "notinvb": return '\u22F7'; 5008 case "isinE": return '\u22F9'; 5009 case "nisd": return '\u22FA'; 5010 case "xnis": return '\u22FB'; 5011 case "nis": return '\u22FC'; 5012 case "notnivc": return '\u22FD'; 5013 case "notnivb": return '\u22FE'; 5014 case "barwed": case "barwedge": return '\u2305'; 5015 case "Barwed": case "doublebarwedge": return '\u2306'; 5016 case "lceil": case "LeftCeiling": return '\u2308'; 5017 case "rceil": case "RightCeiling": return '\u2309'; 5018 case "lfloor": case "LeftFloor": return '\u230A'; 5019 case "rfloor": case "RightFloor": return '\u230B'; 5020 case "drcrop": return '\u230C'; 5021 case "dlcrop": return '\u230D'; 5022 case "urcrop": return '\u230E'; 5023 case "ulcrop": return '\u230F'; 5024 case "bnot": return '\u2310'; 5025 case "profline": return '\u2312'; 5026 case "profsurf": return '\u2313'; 5027 case "telrec": return '\u2315'; 5028 case "target": return '\u2316'; 5029 case "ulcorn": case "ulcorner": return '\u231C'; 5030 case "urcorn": case "urcorner": return '\u231D'; 5031 case "dlcorn": case "llcorner": return '\u231E'; 5032 case "drcorn": case "lrcorner": return '\u231F'; 5033 case "frown": case "sfrown": return '\u2322'; 5034 case "smile": case "ssmile": return '\u2323'; 5035 case "cylcty": return '\u232D'; 5036 case "profalar": return '\u232E'; 5037 case "topbot": return '\u2336'; 5038 case "ovbar": return '\u233D'; 5039 case "solbar": return '\u233F'; 5040 case "angzarr": return '\u237C'; 5041 case "lmoust": case "lmoustache": return '\u23B0'; 5042 case "rmoust": case "rmoustache": return '\u23B1'; 5043 case "tbrk": case "OverBracket": return '\u23B4'; 5044 case "bbrk": case "UnderBracket": return '\u23B5'; 5045 case "bbrktbrk": return '\u23B6'; 5046 case "OverParenthesis": return '\u23DC'; 5047 case "UnderParenthesis": return '\u23DD'; 5048 case "OverBrace": return '\u23DE'; 5049 case "UnderBrace": return '\u23DF'; 5050 case "trpezium": return '\u23E2'; 5051 case "elinters": return '\u23E7'; 5052 case "blank": return '\u2423'; 5053 case "oS": case "circledS": return '\u24C8'; 5054 case "boxh": case "HorizontalLine": return '\u2500'; 5055 case "boxv": return '\u2502'; 5056 case "boxdr": return '\u250C'; 5057 case "boxdl": return '\u2510'; 5058 case "boxur": return '\u2514'; 5059 case "boxul": return '\u2518'; 5060 case "boxvr": return '\u251C'; 5061 case "boxvl": return '\u2524'; 5062 case "boxhd": return '\u252C'; 5063 case "boxhu": return '\u2534'; 5064 case "boxvh": return '\u253C'; 5065 case "boxH": return '\u2550'; 5066 case "boxV": return '\u2551'; 5067 case "boxdR": return '\u2552'; 5068 case "boxDr": return '\u2553'; 5069 case "boxDR": return '\u2554'; 5070 case "boxdL": return '\u2555'; 5071 case "boxDl": return '\u2556'; 5072 case "boxDL": return '\u2557'; 5073 case "boxuR": return '\u2558'; 5074 case "boxUr": return '\u2559'; 5075 case "boxUR": return '\u255A'; 5076 case "boxuL": return '\u255B'; 5077 case "boxUl": return '\u255C'; 5078 case "boxUL": return '\u255D'; 5079 case "boxvR": return '\u255E'; 5080 case "boxVr": return '\u255F'; 5081 case "boxVR": return '\u2560'; 5082 case "boxvL": return '\u2561'; 5083 case "boxVl": return '\u2562'; 5084 case "boxVL": return '\u2563'; 5085 case "boxHd": return '\u2564'; 5086 case "boxhD": return '\u2565'; 5087 case "boxHD": return '\u2566'; 5088 case "boxHu": return '\u2567'; 5089 case "boxhU": return '\u2568'; 5090 case "boxHU": return '\u2569'; 5091 case "boxvH": return '\u256A'; 5092 case "boxVh": return '\u256B'; 5093 case "boxVH": return '\u256C'; 5094 case "uhblk": return '\u2580'; 5095 case "lhblk": return '\u2584'; 5096 case "block": return '\u2588'; 5097 case "blk14": return '\u2591'; 5098 case "blk12": return '\u2592'; 5099 case "blk34": return '\u2593'; 5100 case "squ": case "square": case "Square": return '\u25A1'; 5101 case "squf": case "squarf": case "blacksquare": case "FilledVerySmallSquare": return '\u25AA'; 5102 case "EmptyVerySmallSquare": return '\u25AB'; 5103 case "rect": return '\u25AD'; 5104 case "marker": return '\u25AE'; 5105 case "fltns": return '\u25B1'; 5106 case "xutri": case "bigtriangleup": return '\u25B3'; 5107 case "utrif": case "blacktriangle": return '\u25B4'; 5108 case "utri": case "triangle": return '\u25B5'; 5109 case "rtrif": case "blacktriangleright": return '\u25B8'; 5110 case "rtri": case "triangleright": return '\u25B9'; 5111 case "xdtri": case "bigtriangledown": return '\u25BD'; 5112 case "dtrif": case "blacktriangledown": return '\u25BE'; 5113 case "dtri": case "triangledown": return '\u25BF'; 5114 case "ltrif": case "blacktriangleleft": return '\u25C2'; 5115 case "ltri": case "triangleleft": return '\u25C3'; 5116 case "loz": case "lozenge": return '\u25CA'; 5117 case "cir": return '\u25CB'; 5118 case "tridot": return '\u25EC'; 5119 case "xcirc": case "bigcirc": return '\u25EF'; 5120 case "ultri": return '\u25F8'; 5121 case "urtri": return '\u25F9'; 5122 case "lltri": return '\u25FA'; 5123 case "EmptySmallSquare": return '\u25FB'; 5124 case "FilledSmallSquare": return '\u25FC'; 5125 case "starf": case "bigstar": return '\u2605'; 5126 case "star": return '\u2606'; 5127 case "phone": return '\u260E'; 5128 case "female": return '\u2640'; 5129 case "male": return '\u2642'; 5130 case "spades": case "spadesuit": return '\u2660'; 5131 case "clubs": case "clubsuit": return '\u2663'; 5132 case "hearts": case "heartsuit": return '\u2665'; 5133 case "diams": case "diamondsuit": return '\u2666'; 5134 case "sung": return '\u266A'; 5135 case "flat": return '\u266D'; 5136 case "natur": case "natural": return '\u266E'; 5137 case "sharp": return '\u266F'; 5138 case "check": case "checkmark": return '\u2713'; 5139 case "cross": return '\u2717'; 5140 case "malt": case "maltese": return '\u2720'; 5141 case "sext": return '\u2736'; 5142 case "VerticalSeparator": return '\u2758'; 5143 case "lbbrk": return '\u2772'; 5144 case "rbbrk": return '\u2773'; 5145 case "bsolhsub": return '\u27C8'; 5146 case "suphsol": return '\u27C9'; 5147 case "lobrk": case "LeftDoubleBracket": return '\u27E6'; 5148 case "robrk": case "RightDoubleBracket": return '\u27E7'; 5149 case "lang": case "LeftAngleBracket": case "langle": return '\u27E8'; 5150 case "rang": case "RightAngleBracket": case "rangle": return '\u27E9'; 5151 case "Lang": return '\u27EA'; 5152 case "Rang": return '\u27EB'; 5153 case "loang": return '\u27EC'; 5154 case "roang": return '\u27ED'; 5155 case "xlarr": case "longleftarrow": case "LongLeftArrow": return '\u27F5'; 5156 case "xrarr": case "longrightarrow": case "LongRightArrow": return '\u27F6'; 5157 case "xharr": case "longleftrightarrow": case "LongLeftRightArrow": return '\u27F7'; 5158 case "xlArr": case "Longleftarrow": case "DoubleLongLeftArrow": return '\u27F8'; 5159 case "xrArr": case "Longrightarrow": case "DoubleLongRightArrow": return '\u27F9'; 5160 case "xhArr": case "Longleftrightarrow": case "DoubleLongLeftRightArrow": return '\u27FA'; 5161 case "xmap": case "longmapsto": return '\u27FC'; 5162 case "dzigrarr": return '\u27FF'; 5163 case "nvlArr": return '\u2902'; 5164 case "nvrArr": return '\u2903'; 5165 case "nvHarr": return '\u2904'; 5166 case "Map": return '\u2905'; 5167 case "lbarr": return '\u290C'; 5168 case "rbarr": case "bkarow": return '\u290D'; 5169 case "lBarr": return '\u290E'; 5170 case "rBarr": case "dbkarow": return '\u290F'; 5171 case "RBarr": case "drbkarow": return '\u2910'; 5172 case "DDotrahd": return '\u2911'; 5173 case "UpArrowBar": return '\u2912'; 5174 case "DownArrowBar": return '\u2913'; 5175 case "Rarrtl": return '\u2916'; 5176 case "latail": return '\u2919'; 5177 case "ratail": return '\u291A'; 5178 case "lAtail": return '\u291B'; 5179 case "rAtail": return '\u291C'; 5180 case "larrfs": return '\u291D'; 5181 case "rarrfs": return '\u291E'; 5182 case "larrbfs": return '\u291F'; 5183 case "rarrbfs": return '\u2920'; 5184 case "nwarhk": return '\u2923'; 5185 case "nearhk": return '\u2924'; 5186 case "searhk": case "hksearow": return '\u2925'; 5187 case "swarhk": case "hkswarow": return '\u2926'; 5188 case "nwnear": return '\u2927'; 5189 case "nesear": case "toea": return '\u2928'; 5190 case "seswar": case "tosa": return '\u2929'; 5191 case "swnwar": return '\u292A'; 5192 case "rarrc": return '\u2933'; 5193 case "cudarrr": return '\u2935'; 5194 case "ldca": return '\u2936'; 5195 case "rdca": return '\u2937'; 5196 case "cudarrl": return '\u2938'; 5197 case "larrpl": return '\u2939'; 5198 case "curarrm": return '\u293C'; 5199 case "cularrp": return '\u293D'; 5200 case "rarrpl": return '\u2945'; 5201 case "harrcir": return '\u2948'; 5202 case "Uarrocir": return '\u2949'; 5203 case "lurdshar": return '\u294A'; 5204 case "ldrushar": return '\u294B'; 5205 case "LeftRightVector": return '\u294E'; 5206 case "RightUpDownVector": return '\u294F'; 5207 case "DownLeftRightVector": return '\u2950'; 5208 case "LeftUpDownVector": return '\u2951'; 5209 case "LeftVectorBar": return '\u2952'; 5210 case "RightVectorBar": return '\u2953'; 5211 case "RightUpVectorBar": return '\u2954'; 5212 case "RightDownVectorBar": return '\u2955'; 5213 case "DownLeftVectorBar": return '\u2956'; 5214 case "DownRightVectorBar": return '\u2957'; 5215 case "LeftUpVectorBar": return '\u2958'; 5216 case "LeftDownVectorBar": return '\u2959'; 5217 case "LeftTeeVector": return '\u295A'; 5218 case "RightTeeVector": return '\u295B'; 5219 case "RightUpTeeVector": return '\u295C'; 5220 case "RightDownTeeVector": return '\u295D'; 5221 case "DownLeftTeeVector": return '\u295E'; 5222 case "DownRightTeeVector": return '\u295F'; 5223 case "LeftUpTeeVector": return '\u2960'; 5224 case "LeftDownTeeVector": return '\u2961'; 5225 case "lHar": return '\u2962'; 5226 case "uHar": return '\u2963'; 5227 case "rHar": return '\u2964'; 5228 case "dHar": return '\u2965'; 5229 case "luruhar": return '\u2966'; 5230 case "ldrdhar": return '\u2967'; 5231 case "ruluhar": return '\u2968'; 5232 case "rdldhar": return '\u2969'; 5233 case "lharul": return '\u296A'; 5234 case "llhard": return '\u296B'; 5235 case "rharul": return '\u296C'; 5236 case "lrhard": return '\u296D'; 5237 case "udhar": case "UpEquilibrium": return '\u296E'; 5238 case "duhar": case "ReverseUpEquilibrium": return '\u296F'; 5239 case "RoundImplies": return '\u2970'; 5240 case "erarr": return '\u2971'; 5241 case "simrarr": return '\u2972'; 5242 case "larrsim": return '\u2973'; 5243 case "rarrsim": return '\u2974'; 5244 case "rarrap": return '\u2975'; 5245 case "ltlarr": return '\u2976'; 5246 case "gtrarr": return '\u2978'; 5247 case "subrarr": return '\u2979'; 5248 case "suplarr": return '\u297B'; 5249 case "lfisht": return '\u297C'; 5250 case "rfisht": return '\u297D'; 5251 case "ufisht": return '\u297E'; 5252 case "dfisht": return '\u297F'; 5253 case "lopar": return '\u2985'; 5254 case "ropar": return '\u2986'; 5255 case "lbrke": return '\u298B'; 5256 case "rbrke": return '\u298C'; 5257 case "lbrkslu": return '\u298D'; 5258 case "rbrksld": return '\u298E'; 5259 case "lbrksld": return '\u298F'; 5260 case "rbrkslu": return '\u2990'; 5261 case "langd": return '\u2991'; 5262 case "rangd": return '\u2992'; 5263 case "lparlt": return '\u2993'; 5264 case "rpargt": return '\u2994'; 5265 case "gtlPar": return '\u2995'; 5266 case "ltrPar": return '\u2996'; 5267 case "vzigzag": return '\u299A'; 5268 case "vangrt": return '\u299C'; 5269 case "angrtvbd": return '\u299D'; 5270 case "ange": return '\u29A4'; 5271 case "range": return '\u29A5'; 5272 case "dwangle": return '\u29A6'; 5273 case "uwangle": return '\u29A7'; 5274 case "angmsdaa": return '\u29A8'; 5275 case "angmsdab": return '\u29A9'; 5276 case "angmsdac": return '\u29AA'; 5277 case "angmsdad": return '\u29AB'; 5278 case "angmsdae": return '\u29AC'; 5279 case "angmsdaf": return '\u29AD'; 5280 case "angmsdag": return '\u29AE'; 5281 case "angmsdah": return '\u29AF'; 5282 case "bemptyv": return '\u29B0'; 5283 case "demptyv": return '\u29B1'; 5284 case "cemptyv": return '\u29B2'; 5285 case "raemptyv": return '\u29B3'; 5286 case "laemptyv": return '\u29B4'; 5287 case "ohbar": return '\u29B5'; 5288 case "omid": return '\u29B6'; 5289 case "opar": return '\u29B7'; 5290 case "operp": return '\u29B9'; 5291 case "olcross": return '\u29BB'; 5292 case "odsold": return '\u29BC'; 5293 case "olcir": return '\u29BE'; 5294 case "ofcir": return '\u29BF'; 5295 case "olt": return '\u29C0'; 5296 case "ogt": return '\u29C1'; 5297 case "cirscir": return '\u29C2'; 5298 case "cirE": return '\u29C3'; 5299 case "solb": return '\u29C4'; 5300 case "bsolb": return '\u29C5'; 5301 case "boxbox": return '\u29C9'; 5302 case "trisb": return '\u29CD'; 5303 case "rtriltri": return '\u29CE'; 5304 case "LeftTriangleBar": return '\u29CF'; 5305 case "RightTriangleBar": return '\u29D0'; 5306 case "iinfin": return '\u29DC'; 5307 case "infintie": return '\u29DD'; 5308 case "nvinfin": return '\u29DE'; 5309 case "eparsl": return '\u29E3'; 5310 case "smeparsl": return '\u29E4'; 5311 case "eqvparsl": return '\u29E5'; 5312 case "lozf": case "blacklozenge": return '\u29EB'; 5313 case "RuleDelayed": return '\u29F4'; 5314 case "dsol": return '\u29F6'; 5315 case "xodot": case "bigodot": return '\u2A00'; 5316 case "xoplus": case "bigoplus": return '\u2A01'; 5317 case "xotime": case "bigotimes": return '\u2A02'; 5318 case "xuplus": case "biguplus": return '\u2A04'; 5319 case "xsqcup": case "bigsqcup": return '\u2A06'; 5320 case "qint": case "iiiint": return '\u2A0C'; 5321 case "fpartint": return '\u2A0D'; 5322 case "cirfnint": return '\u2A10'; 5323 case "awint": return '\u2A11'; 5324 case "rppolint": return '\u2A12'; 5325 case "scpolint": return '\u2A13'; 5326 case "npolint": return '\u2A14'; 5327 case "pointint": return '\u2A15'; 5328 case "quatint": return '\u2A16'; 5329 case "intlarhk": return '\u2A17'; 5330 case "pluscir": return '\u2A22'; 5331 case "plusacir": return '\u2A23'; 5332 case "simplus": return '\u2A24'; 5333 case "plusdu": return '\u2A25'; 5334 case "plussim": return '\u2A26'; 5335 case "plustwo": return '\u2A27'; 5336 case "mcomma": return '\u2A29'; 5337 case "minusdu": return '\u2A2A'; 5338 case "loplus": return '\u2A2D'; 5339 case "roplus": return '\u2A2E'; 5340 case "Cross": return '\u2A2F'; 5341 case "timesd": return '\u2A30'; 5342 case "timesbar": return '\u2A31'; 5343 case "smashp": return '\u2A33'; 5344 case "lotimes": return '\u2A34'; 5345 case "rotimes": return '\u2A35'; 5346 case "otimesas": return '\u2A36'; 5347 case "Otimes": return '\u2A37'; 5348 case "odiv": return '\u2A38'; 5349 case "triplus": return '\u2A39'; 5350 case "triminus": return '\u2A3A'; 5351 case "tritime": return '\u2A3B'; 5352 case "iprod": case "intprod": return '\u2A3C'; 5353 case "amalg": return '\u2A3F'; 5354 case "capdot": return '\u2A40'; 5355 case "ncup": return '\u2A42'; 5356 case "ncap": return '\u2A43'; 5357 case "capand": return '\u2A44'; 5358 case "cupor": return '\u2A45'; 5359 case "cupcap": return '\u2A46'; 5360 case "capcup": return '\u2A47'; 5361 case "cupbrcap": return '\u2A48'; 5362 case "capbrcup": return '\u2A49'; 5363 case "cupcup": return '\u2A4A'; 5364 case "capcap": return '\u2A4B'; 5365 case "ccups": return '\u2A4C'; 5366 case "ccaps": return '\u2A4D'; 5367 case "ccupssm": return '\u2A50'; 5368 case "And": return '\u2A53'; 5369 case "Or": return '\u2A54'; 5370 case "andand": return '\u2A55'; 5371 case "oror": return '\u2A56'; 5372 case "orslope": return '\u2A57'; 5373 case "andslope": return '\u2A58'; 5374 case "andv": return '\u2A5A'; 5375 case "orv": return '\u2A5B'; 5376 case "andd": return '\u2A5C'; 5377 case "ord": return '\u2A5D'; 5378 case "wedbar": return '\u2A5F'; 5379 case "sdote": return '\u2A66'; 5380 case "simdot": return '\u2A6A'; 5381 case "congdot": return '\u2A6D'; 5382 case "easter": return '\u2A6E'; 5383 case "apacir": return '\u2A6F'; 5384 case "apE": return '\u2A70'; 5385 case "eplus": return '\u2A71'; 5386 case "pluse": return '\u2A72'; 5387 case "Esim": return '\u2A73'; 5388 case "Colone": return '\u2A74'; 5389 case "Equal": return '\u2A75'; 5390 case "eDDot": case "ddotseq": return '\u2A77'; 5391 case "equivDD": return '\u2A78'; 5392 case "ltcir": return '\u2A79'; 5393 case "gtcir": return '\u2A7A'; 5394 case "ltquest": return '\u2A7B'; 5395 case "gtquest": return '\u2A7C'; 5396 case "les": case "LessSlantEqual": case "leqslant": return '\u2A7D'; 5397 case "ges": case "GreaterSlantEqual": case "geqslant": return '\u2A7E'; 5398 case "lesdot": return '\u2A7F'; 5399 case "gesdot": return '\u2A80'; 5400 case "lesdoto": return '\u2A81'; 5401 case "gesdoto": return '\u2A82'; 5402 case "lesdotor": return '\u2A83'; 5403 case "gesdotol": return '\u2A84'; 5404 case "lap": case "lessapprox": return '\u2A85'; 5405 case "gap": case "gtrapprox": return '\u2A86'; 5406 case "lne": case "lneq": return '\u2A87'; 5407 case "gne": case "gneq": return '\u2A88'; 5408 case "lnap": case "lnapprox": return '\u2A89'; 5409 case "gnap": case "gnapprox": return '\u2A8A'; 5410 case "lEg": case "lesseqqgtr": return '\u2A8B'; 5411 case "gEl": case "gtreqqless": return '\u2A8C'; 5412 case "lsime": return '\u2A8D'; 5413 case "gsime": return '\u2A8E'; 5414 case "lsimg": return '\u2A8F'; 5415 case "gsiml": return '\u2A90'; 5416 case "lgE": return '\u2A91'; 5417 case "glE": return '\u2A92'; 5418 case "lesges": return '\u2A93'; 5419 case "gesles": return '\u2A94'; 5420 case "els": case "eqslantless": return '\u2A95'; 5421 case "egs": case "eqslantgtr": return '\u2A96'; 5422 case "elsdot": return '\u2A97'; 5423 case "egsdot": return '\u2A98'; 5424 case "el": return '\u2A99'; 5425 case "eg": return '\u2A9A'; 5426 case "siml": return '\u2A9D'; 5427 case "simg": return '\u2A9E'; 5428 case "simlE": return '\u2A9F'; 5429 case "simgE": return '\u2AA0'; 5430 case "LessLess": return '\u2AA1'; 5431 case "GreaterGreater": return '\u2AA2'; 5432 case "glj": return '\u2AA4'; 5433 case "gla": return '\u2AA5'; 5434 case "ltcc": return '\u2AA6'; 5435 case "gtcc": return '\u2AA7'; 5436 case "lescc": return '\u2AA8'; 5437 case "gescc": return '\u2AA9'; 5438 case "smt": return '\u2AAA'; 5439 case "lat": return '\u2AAB'; 5440 case "smte": return '\u2AAC'; 5441 case "late": return '\u2AAD'; 5442 case "bumpE": return '\u2AAE'; 5443 case "pre": case "preceq": case "PrecedesEqual": return '\u2AAF'; 5444 case "sce": case "succeq": case "SucceedsEqual": return '\u2AB0'; 5445 case "prE": return '\u2AB3'; 5446 case "scE": return '\u2AB4'; 5447 case "prnE": case "precneqq": return '\u2AB5'; 5448 case "scnE": case "succneqq": return '\u2AB6'; 5449 case "prap": case "precapprox": return '\u2AB7'; 5450 case "scap": case "succapprox": return '\u2AB8'; 5451 case "prnap": case "precnapprox": return '\u2AB9'; 5452 case "scnap": case "succnapprox": return '\u2ABA'; 5453 case "Pr": return '\u2ABB'; 5454 case "Sc": return '\u2ABC'; 5455 case "subdot": return '\u2ABD'; 5456 case "supdot": return '\u2ABE'; 5457 case "subplus": return '\u2ABF'; 5458 case "supplus": return '\u2AC0'; 5459 case "submult": return '\u2AC1'; 5460 case "supmult": return '\u2AC2'; 5461 case "subedot": return '\u2AC3'; 5462 case "supedot": return '\u2AC4'; 5463 case "subE": case "subseteqq": return '\u2AC5'; 5464 case "supE": case "supseteqq": return '\u2AC6'; 5465 case "subsim": return '\u2AC7'; 5466 case "supsim": return '\u2AC8'; 5467 case "subnE": case "subsetneqq": return '\u2ACB'; 5468 case "supnE": case "supsetneqq": return '\u2ACC'; 5469 case "csub": return '\u2ACF'; 5470 case "csup": return '\u2AD0'; 5471 case "csube": return '\u2AD1'; 5472 case "csupe": return '\u2AD2'; 5473 case "subsup": return '\u2AD3'; 5474 case "supsub": return '\u2AD4'; 5475 case "subsub": return '\u2AD5'; 5476 case "supsup": return '\u2AD6'; 5477 case "suphsub": return '\u2AD7'; 5478 case "supdsub": return '\u2AD8'; 5479 case "forkv": return '\u2AD9'; 5480 case "topfork": return '\u2ADA'; 5481 case "mlcp": return '\u2ADB'; 5482 case "Dashv": case "DoubleLeftTee": return '\u2AE4'; 5483 case "Vdashl": return '\u2AE6'; 5484 case "Barv": return '\u2AE7'; 5485 case "vBar": return '\u2AE8'; 5486 case "vBarv": return '\u2AE9'; 5487 case "Vbar": return '\u2AEB'; 5488 case "Not": return '\u2AEC'; 5489 case "bNot": return '\u2AED'; 5490 case "rnmid": return '\u2AEE'; 5491 case "cirmid": return '\u2AEF'; 5492 case "midcir": return '\u2AF0'; 5493 case "topcir": return '\u2AF1'; 5494 case "nhpar": return '\u2AF2'; 5495 case "parsim": return '\u2AF3'; 5496 case "parsl": return '\u2AFD'; 5497 case "fflig": return '\uFB00'; 5498 case "filig": return '\uFB01'; 5499 case "fllig": return '\uFB02'; 5500 case "ffilig": return '\uFB03'; 5501 case "ffllig": return '\uFB04'; 5502 case "Ascr": return '\U0001D49C'; 5503 case "Cscr": return '\U0001D49E'; 5504 case "Dscr": return '\U0001D49F'; 5505 case "Gscr": return '\U0001D4A2'; 5506 case "Jscr": return '\U0001D4A5'; 5507 case "Kscr": return '\U0001D4A6'; 5508 case "Nscr": return '\U0001D4A9'; 5509 case "Oscr": return '\U0001D4AA'; 5510 case "Pscr": return '\U0001D4AB'; 5511 case "Qscr": return '\U0001D4AC'; 5512 case "Sscr": return '\U0001D4AE'; 5513 case "Tscr": return '\U0001D4AF'; 5514 case "Uscr": return '\U0001D4B0'; 5515 case "Vscr": return '\U0001D4B1'; 5516 case "Wscr": return '\U0001D4B2'; 5517 case "Xscr": return '\U0001D4B3'; 5518 case "Yscr": return '\U0001D4B4'; 5519 case "Zscr": return '\U0001D4B5'; 5520 case "ascr": return '\U0001D4B6'; 5521 case "bscr": return '\U0001D4B7'; 5522 case "cscr": return '\U0001D4B8'; 5523 case "dscr": return '\U0001D4B9'; 5524 case "fscr": return '\U0001D4BB'; 5525 case "hscr": return '\U0001D4BD'; 5526 case "iscr": return '\U0001D4BE'; 5527 case "jscr": return '\U0001D4BF'; 5528 case "kscr": return '\U0001D4C0'; 5529 case "lscr": return '\U0001D4C1'; 5530 case "mscr": return '\U0001D4C2'; 5531 case "nscr": return '\U0001D4C3'; 5532 case "pscr": return '\U0001D4C5'; 5533 case "qscr": return '\U0001D4C6'; 5534 case "rscr": return '\U0001D4C7'; 5535 case "sscr": return '\U0001D4C8'; 5536 case "tscr": return '\U0001D4C9'; 5537 case "uscr": return '\U0001D4CA'; 5538 case "vscr": return '\U0001D4CB'; 5539 case "wscr": return '\U0001D4CC'; 5540 case "xscr": return '\U0001D4CD'; 5541 case "yscr": return '\U0001D4CE'; 5542 case "zscr": return '\U0001D4CF'; 5543 case "Afr": return '\U0001D504'; 5544 case "Bfr": return '\U0001D505'; 5545 case "Dfr": return '\U0001D507'; 5546 case "Efr": return '\U0001D508'; 5547 case "Ffr": return '\U0001D509'; 5548 case "Gfr": return '\U0001D50A'; 5549 case "Jfr": return '\U0001D50D'; 5550 case "Kfr": return '\U0001D50E'; 5551 case "Lfr": return '\U0001D50F'; 5552 case "Mfr": return '\U0001D510'; 5553 case "Nfr": return '\U0001D511'; 5554 case "Ofr": return '\U0001D512'; 5555 case "Pfr": return '\U0001D513'; 5556 case "Qfr": return '\U0001D514'; 5557 case "Sfr": return '\U0001D516'; 5558 case "Tfr": return '\U0001D517'; 5559 case "Ufr": return '\U0001D518'; 5560 case "Vfr": return '\U0001D519'; 5561 case "Wfr": return '\U0001D51A'; 5562 case "Xfr": return '\U0001D51B'; 5563 case "Yfr": return '\U0001D51C'; 5564 case "afr": return '\U0001D51E'; 5565 case "bfr": return '\U0001D51F'; 5566 case "cfr": return '\U0001D520'; 5567 case "dfr": return '\U0001D521'; 5568 case "efr": return '\U0001D522'; 5569 case "ffr": return '\U0001D523'; 5570 case "gfr": return '\U0001D524'; 5571 case "hfr": return '\U0001D525'; 5572 case "ifr": return '\U0001D526'; 5573 case "jfr": return '\U0001D527'; 5574 case "kfr": return '\U0001D528'; 5575 case "lfr": return '\U0001D529'; 5576 case "mfr": return '\U0001D52A'; 5577 case "nfr": return '\U0001D52B'; 5578 case "ofr": return '\U0001D52C'; 5579 case "pfr": return '\U0001D52D'; 5580 case "qfr": return '\U0001D52E'; 5581 case "rfr": return '\U0001D52F'; 5582 case "sfr": return '\U0001D530'; 5583 case "tfr": return '\U0001D531'; 5584 case "ufr": return '\U0001D532'; 5585 case "vfr": return '\U0001D533'; 5586 case "wfr": return '\U0001D534'; 5587 case "xfr": return '\U0001D535'; 5588 case "yfr": return '\U0001D536'; 5589 case "zfr": return '\U0001D537'; 5590 case "Aopf": return '\U0001D538'; 5591 case "Bopf": return '\U0001D539'; 5592 case "Dopf": return '\U0001D53B'; 5593 case "Eopf": return '\U0001D53C'; 5594 case "Fopf": return '\U0001D53D'; 5595 case "Gopf": return '\U0001D53E'; 5596 case "Iopf": return '\U0001D540'; 5597 case "Jopf": return '\U0001D541'; 5598 case "Kopf": return '\U0001D542'; 5599 case "Lopf": return '\U0001D543'; 5600 case "Mopf": return '\U0001D544'; 5601 case "Oopf": return '\U0001D546'; 5602 case "Sopf": return '\U0001D54A'; 5603 case "Topf": return '\U0001D54B'; 5604 case "Uopf": return '\U0001D54C'; 5605 case "Vopf": return '\U0001D54D'; 5606 case "Wopf": return '\U0001D54E'; 5607 case "Xopf": return '\U0001D54F'; 5608 case "Yopf": return '\U0001D550'; 5609 case "aopf": return '\U0001D552'; 5610 case "bopf": return '\U0001D553'; 5611 case "copf": return '\U0001D554'; 5612 case "dopf": return '\U0001D555'; 5613 case "eopf": return '\U0001D556'; 5614 case "fopf": return '\U0001D557'; 5615 case "gopf": return '\U0001D558'; 5616 case "hopf": return '\U0001D559'; 5617 case "iopf": return '\U0001D55A'; 5618 case "jopf": return '\U0001D55B'; 5619 case "kopf": return '\U0001D55C'; 5620 case "lopf": return '\U0001D55D'; 5621 case "mopf": return '\U0001D55E'; 5622 case "nopf": return '\U0001D55F'; 5623 case "oopf": return '\U0001D560'; 5624 case "popf": return '\U0001D561'; 5625 case "qopf": return '\U0001D562'; 5626 case "ropf": return '\U0001D563'; 5627 case "sopf": return '\U0001D564'; 5628 case "topf": return '\U0001D565'; 5629 case "uopf": return '\U0001D566'; 5630 case "vopf": return '\U0001D567'; 5631 case "wopf": return '\U0001D568'; 5632 case "xopf": return '\U0001D569'; 5633 case "yopf": return '\U0001D56A'; 5634 case "zopf": return '\U0001D56B'; 5635 5636 // and handling numeric entities 5637 default: 5638 if(entity[1] == '#') { 5639 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5640 auto hex = entity[3..$-1]; 5641 5642 auto p = intFromHex(to!string(hex).toLower()); 5643 return cast(dchar) p; 5644 } else { 5645 auto decimal = entity[2..$-1]; 5646 5647 // dealing with broken html entities 5648 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5649 decimal = decimal[1 .. $]; 5650 5651 if(decimal.length == 0) 5652 return ' '; // this is really broken html 5653 // done with dealing with broken stuff 5654 5655 auto p = std.conv.to!int(decimal); 5656 return cast(dchar) p; 5657 } 5658 } else 5659 return '\ufffd'; // replacement character diamond thing 5660 } 5661 5662 assert(0); 5663 } 5664 5665 import std.utf; 5666 import std.stdio; 5667 5668 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5669 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5670 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5671 /// Group: core_functionality 5672 string htmlEntitiesDecode(string data, bool strict = false) { 5673 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5674 if(data.indexOf("&") == -1) // all html entities begin with & 5675 return data; // if there are no entities in here, we can return the original slice and save some time 5676 5677 char[] a; // this seems to do a *better* job than appender! 5678 5679 char[4] buffer; 5680 5681 bool tryingEntity = false; 5682 dchar[16] entityBeingTried; 5683 int entityBeingTriedLength = 0; 5684 int entityAttemptIndex = 0; 5685 5686 foreach(dchar ch; data) { 5687 if(tryingEntity) { 5688 entityAttemptIndex++; 5689 entityBeingTried[entityBeingTriedLength++] = ch; 5690 5691 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5692 if(ch == '&') { 5693 if(strict) 5694 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5695 5696 // if not strict, let's try to parse both. 5697 5698 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") 5699 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5700 else 5701 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5702 5703 // tryingEntity is still true 5704 entityBeingTriedLength = 1; 5705 entityAttemptIndex = 0; // restarting o this 5706 } else 5707 if(ch == ';') { 5708 tryingEntity = false; 5709 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5710 } else if(ch == ' ') { 5711 // e.g. you & i 5712 if(strict) 5713 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5714 else { 5715 tryingEntity = false; 5716 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5717 } 5718 } else { 5719 if(entityAttemptIndex >= 9) { 5720 if(strict) 5721 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5722 else { 5723 tryingEntity = false; 5724 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5725 } 5726 } 5727 } 5728 } else { 5729 if(ch == '&') { 5730 tryingEntity = true; 5731 entityBeingTriedLength = 0; 5732 entityBeingTried[entityBeingTriedLength++] = ch; 5733 entityAttemptIndex = 0; 5734 } else { 5735 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5736 } 5737 } 5738 } 5739 5740 if(tryingEntity) { 5741 if(strict) 5742 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5743 5744 // otherwise, let's try to recover, at least so we don't drop any data 5745 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5746 // FIXME: what if we have "cool &"? should we try to parse it? 5747 } 5748 5749 return cast(string) a; // assumeUnique is actually kinda slow, lol 5750 } 5751 5752 /// Group: implementations 5753 abstract class SpecialElement : Element { 5754 this(Document _parentDocument) { 5755 super(_parentDocument); 5756 } 5757 5758 ///. 5759 override Element appendChild(Element e) { 5760 assert(0, "Cannot append to a special node"); 5761 } 5762 5763 ///. 5764 @property override int nodeType() const { 5765 return 100; 5766 } 5767 } 5768 5769 ///. 5770 /// Group: implementations 5771 class RawSource : SpecialElement { 5772 ///. 5773 this(Document _parentDocument, string s) { 5774 super(_parentDocument); 5775 source = s; 5776 tagName = "#raw"; 5777 } 5778 5779 ///. 5780 override string nodeValue() const { 5781 return this.toString(); 5782 } 5783 5784 ///. 5785 override string writeToAppender(Appender!string where = appender!string()) const { 5786 where.put(source); 5787 return source; 5788 } 5789 5790 override string toPrettyString(bool, int, string) const { 5791 return source; 5792 } 5793 5794 5795 override RawSource cloneNode(bool deep) { 5796 return new RawSource(parentDocument, source); 5797 } 5798 5799 ///. 5800 string source; 5801 } 5802 5803 /// Group: implementations 5804 abstract class ServerSideCode : SpecialElement { 5805 this(Document _parentDocument, string type) { 5806 super(_parentDocument); 5807 tagName = "#" ~ type; 5808 } 5809 5810 ///. 5811 override string nodeValue() const { 5812 return this.source; 5813 } 5814 5815 ///. 5816 override string writeToAppender(Appender!string where = appender!string()) const { 5817 auto start = where.data.length; 5818 where.put("<"); 5819 where.put(source); 5820 where.put(">"); 5821 return where.data[start .. $]; 5822 } 5823 5824 override string toPrettyString(bool, int, string) const { 5825 return "<" ~ source ~ ">"; 5826 } 5827 5828 ///. 5829 string source; 5830 } 5831 5832 ///. 5833 /// Group: implementations 5834 class PhpCode : ServerSideCode { 5835 ///. 5836 this(Document _parentDocument, string s) { 5837 super(_parentDocument, "php"); 5838 source = s; 5839 } 5840 5841 override PhpCode cloneNode(bool deep) { 5842 return new PhpCode(parentDocument, source); 5843 } 5844 } 5845 5846 ///. 5847 /// Group: implementations 5848 class AspCode : ServerSideCode { 5849 ///. 5850 this(Document _parentDocument, string s) { 5851 super(_parentDocument, "asp"); 5852 source = s; 5853 } 5854 5855 override AspCode cloneNode(bool deep) { 5856 return new AspCode(parentDocument, source); 5857 } 5858 } 5859 5860 ///. 5861 /// Group: implementations 5862 class BangInstruction : SpecialElement { 5863 ///. 5864 this(Document _parentDocument, string s) { 5865 super(_parentDocument); 5866 source = s; 5867 tagName = "#bpi"; 5868 } 5869 5870 ///. 5871 override string nodeValue() const { 5872 return this.source; 5873 } 5874 5875 override BangInstruction cloneNode(bool deep) { 5876 return new BangInstruction(parentDocument, source); 5877 } 5878 5879 ///. 5880 override string writeToAppender(Appender!string where = appender!string()) const { 5881 auto start = where.data.length; 5882 where.put("<!"); 5883 where.put(source); 5884 where.put(">"); 5885 return where.data[start .. $]; 5886 } 5887 5888 override string toPrettyString(bool, int, string) const { 5889 string s; 5890 s ~= "<!"; 5891 s ~= source; 5892 s ~= ">"; 5893 return s; 5894 } 5895 5896 ///. 5897 string source; 5898 } 5899 5900 ///. 5901 /// Group: implementations 5902 class QuestionInstruction : SpecialElement { 5903 ///. 5904 this(Document _parentDocument, string s) { 5905 super(_parentDocument); 5906 source = s; 5907 tagName = "#qpi"; 5908 } 5909 5910 override QuestionInstruction cloneNode(bool deep) { 5911 return new QuestionInstruction(parentDocument, source); 5912 } 5913 5914 ///. 5915 override string nodeValue() const { 5916 return this.source; 5917 } 5918 5919 ///. 5920 override string writeToAppender(Appender!string where = appender!string()) const { 5921 auto start = where.data.length; 5922 where.put("<"); 5923 where.put(source); 5924 where.put(">"); 5925 return where.data[start .. $]; 5926 } 5927 5928 override string toPrettyString(bool, int, string) const { 5929 string s; 5930 s ~= "<"; 5931 s ~= source; 5932 s ~= ">"; 5933 return s; 5934 } 5935 5936 5937 ///. 5938 string source; 5939 } 5940 5941 ///. 5942 /// Group: implementations 5943 class HtmlComment : SpecialElement { 5944 ///. 5945 this(Document _parentDocument, string s) { 5946 super(_parentDocument); 5947 source = s; 5948 tagName = "#comment"; 5949 } 5950 5951 override HtmlComment cloneNode(bool deep) { 5952 return new HtmlComment(parentDocument, source); 5953 } 5954 5955 ///. 5956 override string nodeValue() const { 5957 return this.source; 5958 } 5959 5960 ///. 5961 override string writeToAppender(Appender!string where = appender!string()) const { 5962 auto start = where.data.length; 5963 where.put("<!--"); 5964 where.put(source); 5965 where.put("-->"); 5966 return where.data[start .. $]; 5967 } 5968 5969 override string toPrettyString(bool, int, string) const { 5970 string s; 5971 s ~= "<!--"; 5972 s ~= source; 5973 s ~= "-->"; 5974 return s; 5975 } 5976 5977 5978 ///. 5979 string source; 5980 } 5981 5982 5983 5984 5985 ///. 5986 /// Group: implementations 5987 class TextNode : Element { 5988 public: 5989 ///. 5990 this(Document _parentDocument, string e) { 5991 super(_parentDocument); 5992 contents = e; 5993 tagName = "#text"; 5994 } 5995 5996 /// 5997 this(string e) { 5998 this(null, e); 5999 } 6000 6001 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 6002 6003 ///. 6004 static TextNode fromUndecodedString(Document _parentDocument, string html) { 6005 auto e = new TextNode(_parentDocument, ""); 6006 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 6007 return e; 6008 } 6009 6010 ///. 6011 override @property TextNode cloneNode(bool deep) { 6012 auto n = new TextNode(parentDocument, contents); 6013 return n; 6014 } 6015 6016 ///. 6017 override string nodeValue() const { 6018 return this.contents; //toString(); 6019 } 6020 6021 ///. 6022 @property override int nodeType() const { 6023 return NodeType.Text; 6024 } 6025 6026 ///. 6027 override string writeToAppender(Appender!string where = appender!string()) const { 6028 string s; 6029 if(contents.length) 6030 s = htmlEntitiesEncode(contents, where); 6031 else 6032 s = ""; 6033 6034 assert(s !is null); 6035 return s; 6036 } 6037 6038 override string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 6039 string s; 6040 6041 string contents = this.contents; 6042 // we will first collapse the whitespace per html 6043 // sort of. note this can break stuff yo!!!! 6044 if(this.parentNode is null || this.parentNode.tagName != "pre") { 6045 string n = ""; 6046 bool lastWasWhitespace = indentationLevel > 0; 6047 foreach(char c; contents) { 6048 if(c.isSimpleWhite) { 6049 if(!lastWasWhitespace) 6050 n ~= ' '; 6051 lastWasWhitespace = true; 6052 } else { 6053 n ~= c; 6054 lastWasWhitespace = false; 6055 } 6056 } 6057 6058 contents = n; 6059 } 6060 6061 if(this.parentNode !is null && this.parentNode.tagName != "p") { 6062 contents = contents.strip; 6063 } 6064 6065 auto e = htmlEntitiesEncode(contents); 6066 import std.algorithm.iteration : splitter; 6067 bool first = true; 6068 foreach(line; splitter(e, "\n")) { 6069 if(first) { 6070 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 6071 first = false; 6072 } else { 6073 s ~= "\n"; 6074 if(insertComments) 6075 s ~= "<!--"; 6076 foreach(i; 0 .. indentationLevel) 6077 s ~= "\t"; 6078 if(insertComments) 6079 s ~= "-->"; 6080 } 6081 s ~= line.stripRight; 6082 } 6083 return s; 6084 } 6085 6086 ///. 6087 override Element appendChild(Element e) { 6088 assert(0, "Cannot append to a text node"); 6089 } 6090 6091 ///. 6092 string contents; 6093 // alias contents content; // I just mistype this a lot, 6094 } 6095 6096 /** 6097 There are subclasses of Element offering improved helper 6098 functions for the element in HTML. 6099 */ 6100 6101 ///. 6102 /// Group: implementations 6103 class Link : Element { 6104 6105 ///. 6106 this(Document _parentDocument) { 6107 super(_parentDocument); 6108 this.tagName = "a"; 6109 } 6110 6111 6112 ///. 6113 this(string href, string text) { 6114 super("a"); 6115 setAttribute("href", href); 6116 innerText = text; 6117 } 6118 /+ 6119 /// Returns everything in the href EXCEPT the query string 6120 @property string targetSansQuery() { 6121 6122 } 6123 6124 ///. 6125 @property string domainName() { 6126 6127 } 6128 6129 ///. 6130 @property string path 6131 +/ 6132 /// This gets a variable from the URL's query string. 6133 string getValue(string name) { 6134 auto vars = variablesHash(); 6135 if(name in vars) 6136 return vars[name]; 6137 return null; 6138 } 6139 6140 private string[string] variablesHash() { 6141 string href = getAttribute("href"); 6142 if(href is null) 6143 return null; 6144 6145 auto ques = href.indexOf("?"); 6146 string str = ""; 6147 if(ques != -1) { 6148 str = href[ques+1..$]; 6149 6150 auto fragment = str.indexOf("#"); 6151 if(fragment != -1) 6152 str = str[0..fragment]; 6153 } 6154 6155 string[] variables = str.split("&"); 6156 6157 string[string] hash; 6158 6159 foreach(var; variables) { 6160 auto index = var.indexOf("="); 6161 if(index == -1) 6162 hash[var] = ""; 6163 else { 6164 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 6165 } 6166 } 6167 6168 return hash; 6169 } 6170 6171 ///. 6172 /*private*/ void updateQueryString(string[string] vars) { 6173 string href = getAttribute("href"); 6174 6175 auto question = href.indexOf("?"); 6176 if(question != -1) 6177 href = href[0..question]; 6178 6179 string frag = ""; 6180 auto fragment = href.indexOf("#"); 6181 if(fragment != -1) { 6182 frag = href[fragment..$]; 6183 href = href[0..fragment]; 6184 } 6185 6186 string query = "?"; 6187 bool first = true; 6188 foreach(name, value; vars) { 6189 if(!first) 6190 query ~= "&"; 6191 else 6192 first = false; 6193 6194 query ~= encodeComponent(name); 6195 if(value.length) 6196 query ~= "=" ~ encodeComponent(value); 6197 } 6198 6199 if(query != "?") 6200 href ~= query; 6201 6202 href ~= frag; 6203 6204 setAttribute("href", href); 6205 } 6206 6207 /// Sets or adds the variable with the given name to the given value 6208 /// It automatically URI encodes the values and takes care of the ? and &. 6209 override void setValue(string name, string variable) { 6210 auto vars = variablesHash(); 6211 vars[name] = variable; 6212 6213 updateQueryString(vars); 6214 } 6215 6216 /// Removes the given variable from the query string 6217 void removeValue(string name) { 6218 auto vars = variablesHash(); 6219 vars.remove(name); 6220 6221 updateQueryString(vars); 6222 } 6223 6224 /* 6225 ///. 6226 override string toString() { 6227 6228 } 6229 6230 ///. 6231 override string getAttribute(string name) { 6232 if(name == "href") { 6233 6234 } else 6235 return super.getAttribute(name); 6236 } 6237 */ 6238 } 6239 6240 ///. 6241 /// Group: implementations 6242 class Form : Element { 6243 6244 ///. 6245 this(Document _parentDocument) { 6246 super(_parentDocument); 6247 tagName = "form"; 6248 } 6249 6250 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 6251 auto t = this.querySelector("fieldset div"); 6252 if(t is null) 6253 return super.addField(label, name, type, fieldOptions); 6254 else 6255 return t.addField(label, name, type, fieldOptions); 6256 } 6257 6258 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 6259 auto type = "text"; 6260 auto t = this.querySelector("fieldset div"); 6261 if(t is null) 6262 return super.addField(label, name, type, fieldOptions); 6263 else 6264 return t.addField(label, name, type, fieldOptions); 6265 } 6266 6267 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6268 auto t = this.querySelector("fieldset div"); 6269 if(t is null) 6270 return super.addField(label, name, options, fieldOptions); 6271 else 6272 return t.addField(label, name, options, fieldOptions); 6273 } 6274 6275 override void setValue(string field, string value) { 6276 setValue(field, value, true); 6277 } 6278 6279 // FIXME: doesn't handle arrays; multiple fields can have the same name 6280 6281 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6282 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6283 /// the checked/selected attribute from all, and adds it to the one matching the value. 6284 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6285 6286 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6287 /// Otherwise, it makes a new input with type=hidden to keep the value. 6288 void setValue(string field, string value, bool makeNew) { 6289 auto eles = getField(field); 6290 if(eles.length == 0) { 6291 if(makeNew) { 6292 addInput(field, value); 6293 return; 6294 } else 6295 throw new Exception("form field does not exist"); 6296 } 6297 6298 if(eles.length == 1) { 6299 auto e = eles[0]; 6300 switch(e.tagName) { 6301 default: assert(0); 6302 case "textarea": 6303 e.innerText = value; 6304 break; 6305 case "input": 6306 string type = e.getAttribute("type"); 6307 if(type is null) { 6308 e.value = value; 6309 return; 6310 } 6311 switch(type) { 6312 case "checkbox": 6313 case "radio": 6314 if(value.length && value != "false") 6315 e.setAttribute("checked", "checked"); 6316 else 6317 e.removeAttribute("checked"); 6318 break; 6319 default: 6320 e.value = value; 6321 return; 6322 } 6323 break; 6324 case "select": 6325 bool found = false; 6326 foreach(child; e.tree) { 6327 if(child.tagName != "option") 6328 continue; 6329 string val = child.getAttribute("value"); 6330 if(val is null) 6331 val = child.innerText; 6332 if(val == value) { 6333 child.setAttribute("selected", "selected"); 6334 found = true; 6335 } else 6336 child.removeAttribute("selected"); 6337 } 6338 6339 if(!found) { 6340 e.addChild("option", value) 6341 .setAttribute("selected", "selected"); 6342 } 6343 break; 6344 } 6345 } else { 6346 // assume radio boxes 6347 foreach(e; eles) { 6348 string val = e.getAttribute("value"); 6349 //if(val is null) 6350 // throw new Exception("don't know what to do with radio boxes with null value"); 6351 if(val == value) 6352 e.setAttribute("checked", "checked"); 6353 else 6354 e.removeAttribute("checked"); 6355 } 6356 } 6357 } 6358 6359 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6360 /// it makes no attempt to find and modify existing elements in the form to the new values. 6361 void addValueArray(string key, string[] arrayOfValues) { 6362 foreach(arr; arrayOfValues) 6363 addChild("input", key, arr); 6364 } 6365 6366 /// Gets the value of the field; what would be given if it submitted right now. (so 6367 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6368 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6369 string getValue(string field) { 6370 auto eles = getField(field); 6371 if(eles.length == 0) 6372 return ""; 6373 if(eles.length == 1) { 6374 auto e = eles[0]; 6375 switch(e.tagName) { 6376 default: assert(0); 6377 case "input": 6378 if(e.type == "checkbox") { 6379 if(e.checked) 6380 return e.value.length ? e.value : "checked"; 6381 return ""; 6382 } else 6383 return e.value; 6384 case "textarea": 6385 return e.innerText; 6386 case "select": 6387 foreach(child; e.tree) { 6388 if(child.tagName != "option") 6389 continue; 6390 if(child.selected) 6391 return child.value; 6392 } 6393 break; 6394 } 6395 } else { 6396 // assuming radio 6397 foreach(e; eles) { 6398 if(e.checked) 6399 return e.value; 6400 } 6401 } 6402 6403 return ""; 6404 } 6405 6406 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6407 ///. 6408 string getPostableData() { 6409 bool[string] namesDone; 6410 6411 string ret; 6412 bool outputted = false; 6413 6414 foreach(e; getElementsBySelector("[name]")) { 6415 if(e.name in namesDone) 6416 continue; 6417 6418 if(outputted) 6419 ret ~= "&"; 6420 else 6421 outputted = true; 6422 6423 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 6424 6425 namesDone[e.name] = true; 6426 } 6427 6428 return ret; 6429 } 6430 6431 /// Gets the actual elements with the given name 6432 Element[] getField(string name) { 6433 Element[] ret; 6434 foreach(e; tree) { 6435 if(e.name == name) 6436 ret ~= e; 6437 } 6438 return ret; 6439 } 6440 6441 /// Grabs the <label> with the given for tag, if there is one. 6442 Element getLabel(string forId) { 6443 foreach(e; tree) 6444 if(e.tagName == "label" && e.getAttribute("for") == forId) 6445 return e; 6446 return null; 6447 } 6448 6449 /// Adds a new INPUT field to the end of the form with the given attributes. 6450 Element addInput(string name, string value, string type = "hidden") { 6451 auto e = new Element(parentDocument, "input", null, true); 6452 e.name = name; 6453 e.value = value; 6454 e.type = type; 6455 6456 appendChild(e); 6457 6458 return e; 6459 } 6460 6461 /// Removes the given field from the form. It finds the element and knocks it right out. 6462 void removeField(string name) { 6463 foreach(e; getField(name)) 6464 e.parentNode.removeChild(e); 6465 } 6466 6467 /+ 6468 /// Returns all form members. 6469 @property Element[] elements() { 6470 6471 } 6472 6473 ///. 6474 string opDispatch(string name)(string v = null) 6475 // filter things that should actually be attributes on the form 6476 if( name != "method" && name != "action" && name != "enctype" 6477 && name != "style" && name != "name" && name != "id" && name != "class") 6478 { 6479 6480 } 6481 +/ 6482 /+ 6483 void submit() { 6484 // take its elements and submit them through http 6485 } 6486 +/ 6487 } 6488 6489 import std.conv; 6490 6491 ///. 6492 /// Group: implementations 6493 class Table : Element { 6494 6495 ///. 6496 this(Document _parentDocument) { 6497 super(_parentDocument); 6498 tagName = "table"; 6499 } 6500 6501 /// Creates an element with the given type and content. 6502 Element th(T)(T t) { 6503 Element e; 6504 if(parentDocument !is null) 6505 e = parentDocument.createElement("th"); 6506 else 6507 e = Element.make("th"); 6508 static if(is(T == Html)) 6509 e.innerHTML = t; 6510 else 6511 e.innerText = to!string(t); 6512 return e; 6513 } 6514 6515 /// ditto 6516 Element td(T)(T t) { 6517 Element e; 6518 if(parentDocument !is null) 6519 e = parentDocument.createElement("td"); 6520 else 6521 e = Element.make("td"); 6522 static if(is(T == Html)) 6523 e.innerHTML = t; 6524 else 6525 e.innerText = to!string(t); 6526 return e; 6527 } 6528 6529 /// . 6530 Element appendHeaderRow(T...)(T t) { 6531 return appendRowInternal("th", "thead", t); 6532 } 6533 6534 /// . 6535 Element appendFooterRow(T...)(T t) { 6536 return appendRowInternal("td", "tfoot", t); 6537 } 6538 6539 /// . 6540 Element appendRow(T...)(T t) { 6541 return appendRowInternal("td", "tbody", t); 6542 } 6543 6544 void addColumnClasses(string[] classes...) { 6545 auto grid = getGrid(); 6546 foreach(row; grid) 6547 foreach(i, cl; classes) { 6548 if(cl.length) 6549 if(i < row.length) 6550 row[i].addClass(cl); 6551 } 6552 } 6553 6554 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6555 Element row = Element.make("tr"); 6556 6557 foreach(e; t) { 6558 static if(is(typeof(e) : Element)) { 6559 if(e.tagName == "td" || e.tagName == "th") 6560 row.appendChild(e); 6561 else { 6562 Element a = Element.make(innerType); 6563 6564 a.appendChild(e); 6565 6566 row.appendChild(a); 6567 } 6568 } else static if(is(typeof(e) == Html)) { 6569 Element a = Element.make(innerType); 6570 a.innerHTML = e.source; 6571 row.appendChild(a); 6572 } else static if(is(typeof(e) == Element[])) { 6573 Element a = Element.make(innerType); 6574 foreach(ele; e) 6575 a.appendChild(ele); 6576 row.appendChild(a); 6577 } else static if(is(typeof(e) == string[])) { 6578 foreach(ele; e) { 6579 Element a = Element.make(innerType); 6580 a.innerText = to!string(ele); 6581 row.appendChild(a); 6582 } 6583 } else { 6584 Element a = Element.make(innerType); 6585 a.innerText = to!string(e); 6586 row.appendChild(a); 6587 } 6588 } 6589 6590 foreach(e; children) { 6591 if(e.tagName == findType) { 6592 e.appendChild(row); 6593 return row; 6594 } 6595 } 6596 6597 // the type was not found if we are here... let's add it so it is well-formed 6598 auto lol = this.addChild(findType); 6599 lol.appendChild(row); 6600 6601 return row; 6602 } 6603 6604 ///. 6605 Element captionElement() { 6606 Element cap; 6607 foreach(c; children) { 6608 if(c.tagName == "caption") { 6609 cap = c; 6610 break; 6611 } 6612 } 6613 6614 if(cap is null) { 6615 cap = Element.make("caption"); 6616 appendChild(cap); 6617 } 6618 6619 return cap; 6620 } 6621 6622 ///. 6623 @property string caption() { 6624 return captionElement().innerText; 6625 } 6626 6627 ///. 6628 @property void caption(string text) { 6629 captionElement().innerText = text; 6630 } 6631 6632 /// Gets the logical layout of the table as a rectangular grid of 6633 /// cells. It considers rowspan and colspan. A cell with a large 6634 /// span is represented in the grid by being referenced several times. 6635 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6636 /// <tfoot> portion if you pass one. 6637 /// 6638 /// Note: the rectangular grid might include null cells. 6639 /// 6640 /// This is kinda expensive so you should call once when you want the grid, 6641 /// then do lookups on the returned array. 6642 TableCell[][] getGrid(Element tablePortition = null) 6643 in { 6644 if(tablePortition is null) 6645 assert(tablePortition is null); 6646 else { 6647 assert(tablePortition !is null); 6648 assert(tablePortition.parentNode is this); 6649 assert( 6650 tablePortition.tagName == "tbody" 6651 || 6652 tablePortition.tagName == "tfoot" 6653 || 6654 tablePortition.tagName == "thead" 6655 ); 6656 } 6657 } 6658 do { 6659 if(tablePortition is null) 6660 tablePortition = this; 6661 6662 TableCell[][] ret; 6663 6664 // FIXME: will also return rows of sub tables! 6665 auto rows = tablePortition.getElementsByTagName("tr"); 6666 ret.length = rows.length; 6667 6668 int maxLength = 0; 6669 6670 int insertCell(int row, int position, TableCell cell) { 6671 if(row >= ret.length) 6672 return position; // not supposed to happen - a rowspan is prolly too big. 6673 6674 if(position == -1) { 6675 position++; 6676 foreach(item; ret[row]) { 6677 if(item is null) 6678 break; 6679 position++; 6680 } 6681 } 6682 6683 if(position < ret[row].length) 6684 ret[row][position] = cell; 6685 else 6686 foreach(i; ret[row].length .. position + 1) { 6687 if(i == position) 6688 ret[row] ~= cell; 6689 else 6690 ret[row] ~= null; 6691 } 6692 return position; 6693 } 6694 6695 foreach(i, rowElement; rows) { 6696 auto row = cast(TableRow) rowElement; 6697 assert(row !is null); 6698 assert(i < ret.length); 6699 6700 int position = 0; 6701 foreach(cellElement; rowElement.childNodes) { 6702 auto cell = cast(TableCell) cellElement; 6703 if(cell is null) 6704 continue; 6705 6706 // FIXME: colspan == 0 or rowspan == 0 6707 // is supposed to mean fill in the rest of 6708 // the table, not skip it 6709 foreach(int j; 0 .. cell.colspan) { 6710 foreach(int k; 0 .. cell.rowspan) 6711 // if the first row, always append. 6712 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6713 position++; 6714 } 6715 } 6716 6717 if(ret[i].length > maxLength) 6718 maxLength = cast(int) ret[i].length; 6719 } 6720 6721 // want to ensure it's rectangular 6722 foreach(ref r; ret) { 6723 foreach(i; r.length .. maxLength) 6724 r ~= null; 6725 } 6726 6727 return ret; 6728 } 6729 } 6730 6731 /// Represents a table row element - a <tr> 6732 /// Group: implementations 6733 class TableRow : Element { 6734 ///. 6735 this(Document _parentDocument) { 6736 super(_parentDocument); 6737 tagName = "tr"; 6738 } 6739 6740 // FIXME: the standard says there should be a lot more in here, 6741 // but meh, I never use it and it's a pain to implement. 6742 } 6743 6744 /// Represents anything that can be a table cell - <td> or <th> html. 6745 /// Group: implementations 6746 class TableCell : Element { 6747 ///. 6748 this(Document _parentDocument, string _tagName) { 6749 super(_parentDocument, _tagName); 6750 } 6751 6752 @property int rowspan() const { 6753 int ret = 1; 6754 auto it = getAttribute("rowspan"); 6755 if(it.length) 6756 ret = to!int(it); 6757 return ret; 6758 } 6759 6760 @property int colspan() const { 6761 int ret = 1; 6762 auto it = getAttribute("colspan"); 6763 if(it.length) 6764 ret = to!int(it); 6765 return ret; 6766 } 6767 6768 @property int rowspan(int i) { 6769 setAttribute("rowspan", to!string(i)); 6770 return i; 6771 } 6772 6773 @property int colspan(int i) { 6774 setAttribute("colspan", to!string(i)); 6775 return i; 6776 } 6777 6778 } 6779 6780 6781 ///. 6782 /// Group: implementations 6783 class MarkupException : Exception { 6784 6785 ///. 6786 this(string message, string file = __FILE__, size_t line = __LINE__) { 6787 super(message, file, line); 6788 } 6789 } 6790 6791 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6792 /// Group: implementations 6793 class ElementNotFoundException : Exception { 6794 6795 /// type == kind of element you were looking for and search == a selector describing the search. 6796 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6797 this.searchContext = searchContext; 6798 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6799 } 6800 6801 Element searchContext; 6802 } 6803 6804 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6805 /// 6806 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6807 /// Group: core_functionality 6808 struct Html { 6809 /// This string holds the actual html. Use it to retrieve the contents. 6810 string source; 6811 } 6812 6813 // for the observers 6814 enum DomMutationOperations { 6815 setAttribute, 6816 removeAttribute, 6817 appendChild, // tagname, attributes[], innerHTML 6818 insertBefore, 6819 truncateChildren, 6820 removeChild, 6821 appendHtml, 6822 replaceHtml, 6823 appendText, 6824 replaceText, 6825 replaceTextOnly 6826 } 6827 6828 // and for observers too 6829 struct DomMutationEvent { 6830 DomMutationOperations operation; 6831 Element target; 6832 Element related; // what this means differs with the operation 6833 Element related2; 6834 string relatedString; 6835 string relatedString2; 6836 } 6837 6838 6839 private immutable static string[] htmlSelfClosedElements = [ 6840 // html 4 6841 "img", "hr", "input", "br", "col", "link", "meta", 6842 // html 5 6843 "source" ]; 6844 6845 private immutable static string[] htmlInlineElements = [ 6846 "span", "strong", "em", "b", "i", "a" 6847 ]; 6848 6849 6850 static import std.conv; 6851 6852 ///. 6853 int intFromHex(string hex) { 6854 int place = 1; 6855 int value = 0; 6856 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6857 int v; 6858 char q = hex[a]; 6859 if( q >= '0' && q <= '9') 6860 v = q - '0'; 6861 else if (q >= 'a' && q <= 'f') 6862 v = q - 'a' + 10; 6863 else throw new Exception("Illegal hex character: " ~ q); 6864 6865 value += v * place; 6866 6867 place *= 16; 6868 } 6869 6870 return value; 6871 } 6872 6873 6874 // CSS selector handling 6875 6876 // EXTENSIONS 6877 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6878 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6879 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6880 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6881 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6882 6883 6884 6885 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6886 // That might be useful to implement, though I do have parent selectors too. 6887 6888 ///. 6889 static immutable string[] selectorTokens = [ 6890 // It is important that the 2 character possibilities go first here for accurate lexing 6891 "~=", "*=", "|=", "^=", "$=", "!=", 6892 "::", ">>", 6893 "<<", // my any-parent extension (reciprocal of whitespace) 6894 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6895 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6896 ]; // other is white space or a name. 6897 6898 ///. 6899 sizediff_t idToken(string str, sizediff_t position) { 6900 sizediff_t tid = -1; 6901 char c = str[position]; 6902 foreach(a, token; selectorTokens) 6903 6904 if(c == token[0]) { 6905 if(token.length > 1) { 6906 if(position + 1 >= str.length || str[position+1] != token[1]) 6907 continue; // not this token 6908 } 6909 tid = a; 6910 break; 6911 } 6912 return tid; 6913 } 6914 6915 ///. 6916 // look, ma, no phobos! 6917 // new lexer by ketmar 6918 string[] lexSelector (string selstr) { 6919 6920 static sizediff_t idToken (string str, size_t stpos) { 6921 char c = str[stpos]; 6922 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6923 if (c == token[0]) { 6924 if (token.length > 1) { 6925 assert(token.length == 2, token); // we don't have 3-char tokens yet 6926 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6927 } 6928 return tidx; 6929 } 6930 } 6931 return -1; 6932 } 6933 6934 // skip spaces and comments 6935 static string removeLeadingBlanks (string str) { 6936 size_t curpos = 0; 6937 while (curpos < str.length) { 6938 immutable char ch = str[curpos]; 6939 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6940 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6941 // comment 6942 curpos += 2; 6943 while (curpos < str.length) { 6944 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6945 curpos += 2; 6946 break; 6947 } 6948 ++curpos; 6949 } 6950 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6951 ++curpos; 6952 6953 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6954 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6955 // That is not the same as ".foo.bar". If the space is stripped, important 6956 // information is lost, despite the tokens being separatable anyway. 6957 // 6958 // The parser really needs to be aware of the presence of a space. 6959 } else { 6960 break; 6961 } 6962 } 6963 return str[curpos..$]; 6964 } 6965 6966 static bool isBlankAt() (string str, size_t pos) { 6967 // we should consider unicode spaces too, but... unicode sux anyway. 6968 return 6969 (pos < str.length && // in string 6970 (str[pos] <= 32 || // space 6971 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6972 } 6973 6974 string[] tokens; 6975 // lexx it! 6976 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6977 if(selstr[0] == '\"' || selstr[0] == '\'') { 6978 auto end = selstr[0]; 6979 auto pos = 1; 6980 bool escaping; 6981 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6982 if(escaping) 6983 escaping = false; 6984 else if(selstr[pos] == '\\') 6985 escaping = true; 6986 pos++; 6987 } 6988 6989 // FIXME: do better unescaping 6990 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6991 if(pos+1 >= selstr.length) 6992 assert(0, selstr); 6993 selstr = selstr[pos + 1.. $]; 6994 continue; 6995 } 6996 6997 6998 // no tokens starts with escape 6999 immutable tid = idToken(selstr, 0); 7000 if (tid >= 0) { 7001 // special token 7002 tokens ~= selectorTokens[tid]; // it's funnier this way 7003 selstr = selstr[selectorTokens[tid].length..$]; 7004 continue; 7005 } 7006 // from start to space or special token 7007 size_t escapePos = size_t.max; 7008 size_t curpos = 0; // i can has chizburger^w escape at the start 7009 while (curpos < selstr.length) { 7010 if (selstr[curpos] == '\\') { 7011 // this is escape, just skip it and next char 7012 if (escapePos == size_t.max) escapePos = curpos; 7013 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 7014 } else { 7015 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 7016 ++curpos; 7017 } 7018 } 7019 // identifier 7020 if (escapePos != size_t.max) { 7021 // i hate it when it happens 7022 string id = selstr[0..escapePos]; 7023 while (escapePos < curpos) { 7024 if (curpos-escapePos < 2) break; 7025 id ~= selstr[escapePos+1]; // escaped char 7026 escapePos += 2; 7027 immutable stp = escapePos; 7028 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 7029 if (escapePos > stp) id ~= selstr[stp..escapePos]; 7030 } 7031 if (id.length > 0) tokens ~= id; 7032 } else { 7033 tokens ~= selstr[0..curpos]; 7034 } 7035 selstr = selstr[curpos..$]; 7036 } 7037 return tokens; 7038 } 7039 version(unittest_domd_lexer) unittest { 7040 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 7041 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 7042 assert(lexSelector(r" < <") == ["<", "<"]); 7043 assert(lexSelector(r" <<") == ["<<"]); 7044 assert(lexSelector(r" <</") == ["<<", "/"]); 7045 assert(lexSelector(r" <</*") == ["<<"]); 7046 assert(lexSelector(r" <\</*") == ["<", "<"]); 7047 assert(lexSelector(r"heh\") == ["heh"]); 7048 assert(lexSelector(r"alice \") == ["alice"]); 7049 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 7050 } 7051 7052 ///. 7053 struct SelectorPart { 7054 string tagNameFilter; ///. 7055 string[] attributesPresent; /// [attr] 7056 string[2][] attributesEqual; /// [attr=value] 7057 string[2][] attributesStartsWith; /// [attr^=value] 7058 string[2][] attributesEndsWith; /// [attr$=value] 7059 // split it on space, then match to these 7060 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 7061 // split it on dash, then match to these 7062 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 7063 string[2][] attributesInclude; /// [attr*=value] 7064 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 7065 7066 string[] hasSelectors; /// :has(this) 7067 string[] notSelectors; /// :not(this) 7068 7069 string[] isSelectors; /// :is(this) 7070 string[] whereSelectors; /// :where(this) 7071 7072 ParsedNth[] nthOfType; /// . 7073 ParsedNth[] nthLastOfType; /// . 7074 ParsedNth[] nthChild; /// . 7075 7076 bool firstChild; ///. 7077 bool lastChild; ///. 7078 7079 bool firstOfType; /// . 7080 bool lastOfType; /// . 7081 7082 bool emptyElement; ///. 7083 bool whitespaceOnly; /// 7084 bool oddChild; ///. 7085 bool evenChild; ///. 7086 7087 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 7088 7089 bool rootElement; ///. 7090 7091 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 7092 7093 bool isCleanSlateExceptSeparation() { 7094 auto cp = this; 7095 cp.separation = -1; 7096 return cp is SelectorPart.init; 7097 } 7098 7099 ///. 7100 string toString() { 7101 string ret; 7102 switch(separation) { 7103 default: assert(0); 7104 case -1: break; 7105 case 0: ret ~= " "; break; 7106 case 1: ret ~= " > "; break; 7107 case 2: ret ~= " + "; break; 7108 case 3: ret ~= " ~ "; break; 7109 case 4: ret ~= " < "; break; 7110 } 7111 ret ~= tagNameFilter; 7112 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 7113 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 7114 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 7115 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 7116 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 7117 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 7118 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 7119 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 7120 7121 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 7122 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 7123 7124 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 7125 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 7126 7127 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 7128 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 7129 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 7130 7131 if(firstChild) ret ~= ":first-child"; 7132 if(lastChild) ret ~= ":last-child"; 7133 if(firstOfType) ret ~= ":first-of-type"; 7134 if(lastOfType) ret ~= ":last-of-type"; 7135 if(emptyElement) ret ~= ":empty"; 7136 if(whitespaceOnly) ret ~= ":whitespace-only"; 7137 if(oddChild) ret ~= ":odd-child"; 7138 if(evenChild) ret ~= ":even-child"; 7139 if(rootElement) ret ~= ":root"; 7140 if(scopeElement) ret ~= ":scope"; 7141 7142 return ret; 7143 } 7144 7145 // USEFUL 7146 ///. 7147 bool matchElement(Element e) { 7148 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 7149 // Each individual call is reasonably fast already, but it adds up. 7150 if(e is null) return false; 7151 if(e.nodeType != 1) return false; 7152 7153 if(tagNameFilter != "" && tagNameFilter != "*") 7154 if(e.tagName != tagNameFilter) 7155 return false; 7156 if(firstChild) { 7157 if(e.parentNode is null) 7158 return false; 7159 if(e.parentNode.childElements[0] !is e) 7160 return false; 7161 } 7162 if(lastChild) { 7163 if(e.parentNode is null) 7164 return false; 7165 auto ce = e.parentNode.childElements; 7166 if(ce[$-1] !is e) 7167 return false; 7168 } 7169 if(firstOfType) { 7170 if(e.parentNode is null) 7171 return false; 7172 auto ce = e.parentNode.childElements; 7173 foreach(c; ce) { 7174 if(c.tagName == e.tagName) { 7175 if(c is e) 7176 return true; 7177 else 7178 return false; 7179 } 7180 } 7181 } 7182 if(lastOfType) { 7183 if(e.parentNode is null) 7184 return false; 7185 auto ce = e.parentNode.childElements; 7186 foreach_reverse(c; ce) { 7187 if(c.tagName == e.tagName) { 7188 if(c is e) 7189 return true; 7190 else 7191 return false; 7192 } 7193 } 7194 } 7195 /+ 7196 if(scopeElement) { 7197 if(e !is this_) 7198 return false; 7199 } 7200 +/ 7201 if(emptyElement) { 7202 if(e.children.length) 7203 return false; 7204 } 7205 if(whitespaceOnly) { 7206 if(e.innerText.strip.length) 7207 return false; 7208 } 7209 if(rootElement) { 7210 if(e.parentNode !is null) 7211 return false; 7212 } 7213 if(oddChild || evenChild) { 7214 if(e.parentNode is null) 7215 return false; 7216 foreach(i, child; e.parentNode.childElements) { 7217 if(child is e) { 7218 if(oddChild && !(i&1)) 7219 return false; 7220 if(evenChild && (i&1)) 7221 return false; 7222 break; 7223 } 7224 } 7225 } 7226 7227 bool matchWithSeparator(string attr, string value, string separator) { 7228 foreach(s; attr.split(separator)) 7229 if(s == value) 7230 return true; 7231 return false; 7232 } 7233 7234 foreach(a; attributesPresent) 7235 if(a !in e.attributes) 7236 return false; 7237 foreach(a; attributesEqual) 7238 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7239 return false; 7240 foreach(a; attributesNotEqual) 7241 // FIXME: maybe it should say null counts... this just bit me. 7242 // I did [attr][attr!=value] to work around. 7243 // 7244 // if it's null, it's not equal, right? 7245 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7246 if(e.getAttribute(a[0]) == a[1]) 7247 return false; 7248 foreach(a; attributesInclude) 7249 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7250 return false; 7251 foreach(a; attributesStartsWith) 7252 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7253 return false; 7254 foreach(a; attributesEndsWith) 7255 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7256 return false; 7257 foreach(a; attributesIncludesSeparatedBySpaces) 7258 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7259 return false; 7260 foreach(a; attributesIncludesSeparatedByDashes) 7261 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7262 return false; 7263 foreach(a; hasSelectors) { 7264 if(e.querySelector(a) is null) 7265 return false; 7266 } 7267 foreach(a; notSelectors) { 7268 auto sel = Selector(a); 7269 if(sel.matchesElement(e)) 7270 return false; 7271 } 7272 foreach(a; isSelectors) { 7273 auto sel = Selector(a); 7274 if(!sel.matchesElement(e)) 7275 return false; 7276 } 7277 foreach(a; whereSelectors) { 7278 auto sel = Selector(a); 7279 if(!sel.matchesElement(e)) 7280 return false; 7281 } 7282 7283 foreach(a; nthChild) { 7284 if(e.parentNode is null) 7285 return false; 7286 7287 auto among = e.parentNode.childElements; 7288 7289 if(!a.solvesFor(among, e)) 7290 return false; 7291 } 7292 foreach(a; nthOfType) { 7293 if(e.parentNode is null) 7294 return false; 7295 7296 auto among = e.parentNode.childElements(e.tagName); 7297 7298 if(!a.solvesFor(among, e)) 7299 return false; 7300 } 7301 foreach(a; nthLastOfType) { 7302 if(e.parentNode is null) 7303 return false; 7304 7305 auto among = retro(e.parentNode.childElements(e.tagName)); 7306 7307 if(!a.solvesFor(among, e)) 7308 return false; 7309 } 7310 7311 return true; 7312 } 7313 } 7314 7315 struct ParsedNth { 7316 int multiplier; 7317 int adder; 7318 7319 string of; 7320 7321 this(string text) { 7322 auto original = text; 7323 consumeWhitespace(text); 7324 if(text.startsWith("odd")) { 7325 multiplier = 2; 7326 adder = 1; 7327 7328 text = text[3 .. $]; 7329 } else if(text.startsWith("even")) { 7330 multiplier = 2; 7331 adder = 1; 7332 7333 text = text[4 .. $]; 7334 } else { 7335 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7336 consumeWhitespace(text); 7337 if(text.length && text[0] == 'n') { 7338 multiplier = n; 7339 text = text[1 .. $]; 7340 consumeWhitespace(text); 7341 if(text.length) { 7342 if(text[0] == '+') { 7343 text = text[1 .. $]; 7344 adder = parseNumber(text); 7345 } else if(text[0] == '-') { 7346 text = text[1 .. $]; 7347 adder = -parseNumber(text); 7348 } else if(text[0] == 'o') { 7349 // continue, this is handled below 7350 } else 7351 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7352 } 7353 } else { 7354 adder = n; 7355 } 7356 } 7357 7358 consumeWhitespace(text); 7359 if(text.startsWith("of")) { 7360 text = text[2 .. $]; 7361 consumeWhitespace(text); 7362 of = text[0 .. $]; 7363 } 7364 } 7365 7366 string toString() { 7367 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7368 } 7369 7370 bool solvesFor(R)(R elements, Element e) { 7371 int idx = 1; 7372 bool found = false; 7373 foreach(ele; elements) { 7374 if(of.length) { 7375 auto sel = Selector(of); 7376 if(!sel.matchesElement(ele)) 7377 continue; 7378 } 7379 if(ele is e) { 7380 found = true; 7381 break; 7382 } 7383 idx++; 7384 } 7385 if(!found) return false; 7386 7387 // multiplier* n + adder = idx 7388 // if there is a solution for integral n, it matches 7389 7390 idx -= adder; 7391 if(multiplier) { 7392 if(idx % multiplier == 0) 7393 return true; 7394 } else { 7395 return idx == 0; 7396 } 7397 return false; 7398 } 7399 7400 private void consumeWhitespace(ref string text) { 7401 while(text.length && text[0] == ' ') 7402 text = text[1 .. $]; 7403 } 7404 7405 private int parseNumber(ref string text) { 7406 consumeWhitespace(text); 7407 if(text.length == 0) return 0; 7408 bool negative = text[0] == '-'; 7409 if(text[0] == '+') 7410 text = text[1 .. $]; 7411 if(negative) text = text[1 .. $]; 7412 int i = 0; 7413 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7414 i++; 7415 if(i == 0) 7416 return 0; 7417 int cool = to!int(text[0 .. i]); 7418 text = text[i .. $]; 7419 return negative ? -cool : cool; 7420 } 7421 } 7422 7423 // USEFUL 7424 ///. 7425 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 7426 Element[] ret; 7427 if(!parts.length) { 7428 return [start]; // the null selector only matches the start point; it 7429 // is what terminates the recursion 7430 } 7431 7432 auto part = parts[0]; 7433 //writeln("checking ", part, " against ", start, " with ", part.separation); 7434 switch(part.separation) { 7435 default: assert(0); 7436 case -1: 7437 case 0: // tree 7438 foreach(e; start.tree) { 7439 if(part.separation == 0 && start is e) 7440 continue; // space doesn't match itself! 7441 if(part.matchElement(e)) { 7442 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7443 } 7444 } 7445 break; 7446 case 1: // children 7447 foreach(e; start.childNodes) { 7448 if(part.matchElement(e)) { 7449 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7450 } 7451 } 7452 break; 7453 case 2: // next-sibling 7454 auto e = start.nextSibling("*"); 7455 if(part.matchElement(e)) 7456 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7457 break; 7458 case 3: // younger sibling 7459 auto tmp = start.parentNode; 7460 if(tmp !is null) { 7461 sizediff_t pos = -1; 7462 auto children = tmp.childElements; 7463 foreach(i, child; children) { 7464 if(child is start) { 7465 pos = i; 7466 break; 7467 } 7468 } 7469 assert(pos != -1); 7470 foreach(e; children[pos+1..$]) { 7471 if(part.matchElement(e)) 7472 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7473 } 7474 } 7475 break; 7476 case 4: // immediate parent node, an extension of mine to walk back up the tree 7477 auto e = start.parentNode; 7478 if(part.matchElement(e)) { 7479 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7480 } 7481 /* 7482 Example of usefulness: 7483 7484 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7485 7486 table th < tr 7487 7488 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7489 */ 7490 break; 7491 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7492 /* 7493 Like with the < operator, this is best used to find some parent of a particular known element. 7494 7495 Say you have an anchor inside a 7496 */ 7497 } 7498 7499 return ret; 7500 } 7501 7502 /++ 7503 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7504 7505 See_Also: 7506 $(LIST 7507 * [Element.querySelector] 7508 * [Element.querySelectorAll] 7509 * [Element.matches] 7510 * [Element.closest] 7511 * [Document.querySelector] 7512 * [Document.querySelectorAll] 7513 ) 7514 +/ 7515 /// Group: core_functionality 7516 struct Selector { 7517 SelectorComponent[] components; 7518 string original; 7519 /++ 7520 Parses the selector string and constructs the usable structure. 7521 +/ 7522 this(string cssSelector) { 7523 components = parseSelectorString(cssSelector); 7524 original = cssSelector; 7525 } 7526 7527 /++ 7528 Returns true if the given element matches this selector, 7529 considered relative to an arbitrary element. 7530 7531 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7532 with [std.algorithm.iteration.filter]: 7533 7534 --- 7535 Selector sel = Selector("foo > bar"); 7536 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7537 --- 7538 +/ 7539 bool matchesElement(Element e, Element relativeTo = null) { 7540 foreach(component; components) 7541 if(component.matchElement(e, relativeTo)) 7542 return true; 7543 7544 return false; 7545 } 7546 7547 /++ 7548 Reciprocal of [Element.querySelectorAll] 7549 +/ 7550 Element[] getMatchingElements(Element start) { 7551 Element[] ret; 7552 foreach(component; components) 7553 ret ~= getElementsBySelectorParts(start, component.parts); 7554 return removeDuplicates(ret); 7555 } 7556 7557 /++ 7558 Like [getMatchingElements], but returns a lazy range. Be careful 7559 about mutating the dom as you iterate through this. 7560 +/ 7561 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7562 import std.algorithm.iteration; 7563 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7564 } 7565 7566 7567 /// Returns the string this was built from 7568 string toString() { 7569 return original; 7570 } 7571 7572 /++ 7573 Returns a string from the parsed result 7574 7575 7576 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7577 +/ 7578 string parsedToString() { 7579 string ret; 7580 7581 foreach(idx, component; components) { 7582 if(idx) ret ~= ", "; 7583 ret ~= component.toString(); 7584 } 7585 7586 return ret; 7587 } 7588 } 7589 7590 ///. 7591 struct SelectorComponent { 7592 ///. 7593 SelectorPart[] parts; 7594 7595 ///. 7596 string toString() { 7597 string ret; 7598 foreach(part; parts) 7599 ret ~= part.toString(); 7600 return ret; 7601 } 7602 7603 // USEFUL 7604 ///. 7605 Element[] getElements(Element start) { 7606 return removeDuplicates(getElementsBySelectorParts(start, parts)); 7607 } 7608 7609 // USEFUL (but not implemented) 7610 /// If relativeTo == null, it assumes the root of the parent document. 7611 bool matchElement(Element e, Element relativeTo = null) { 7612 if(e is null) return false; 7613 Element where = e; 7614 int lastSeparation = -1; 7615 7616 auto lparts = parts; 7617 7618 if(parts.length && parts[0].separation > 0) { 7619 // if it starts with a non-trivial separator, inject 7620 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7621 // which implies html 7622 7623 // there is probably a MUCH better way to do this. 7624 auto dummy = SelectorPart.init; 7625 dummy.tagNameFilter = "*"; 7626 dummy.separation = 0; 7627 lparts = dummy ~ lparts; 7628 } 7629 7630 foreach(part; retro(lparts)) { 7631 7632 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7633 // writeln(parts); 7634 7635 if(lastSeparation == -1) { 7636 if(!part.matchElement(where)) 7637 return false; 7638 } else if(lastSeparation == 0) { // generic parent 7639 // need to go up the whole chain 7640 where = where.parentNode; 7641 7642 while(where !is null) { 7643 if(part.matchElement(where)) 7644 break; 7645 7646 if(where is relativeTo) 7647 return false; 7648 7649 where = where.parentNode; 7650 } 7651 7652 if(where is null) 7653 return false; 7654 } else if(lastSeparation == 1) { // the > operator 7655 where = where.parentNode; 7656 7657 if(!part.matchElement(where)) 7658 return false; 7659 } else if(lastSeparation == 2) { // the + operator 7660 //writeln("WHERE", where, " ", part); 7661 where = where.previousSibling("*"); 7662 7663 if(!part.matchElement(where)) 7664 return false; 7665 } else if(lastSeparation == 3) { // the ~ operator 7666 where = where.previousSibling("*"); 7667 while(where !is null) { 7668 if(part.matchElement(where)) 7669 break; 7670 7671 if(where is relativeTo) 7672 return false; 7673 7674 where = where.previousSibling("*"); 7675 } 7676 7677 if(where is null) 7678 return false; 7679 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7680 // FIXME 7681 } 7682 7683 lastSeparation = part.separation; 7684 7685 if(where is relativeTo) 7686 return false; // at end of line, if we aren't done by now, the match fails 7687 } 7688 return true; // if we got here, it is a success 7689 } 7690 7691 // the string should NOT have commas. Use parseSelectorString for that instead 7692 ///. 7693 static SelectorComponent fromString(string selector) { 7694 return parseSelector(lexSelector(selector)); 7695 } 7696 } 7697 7698 ///. 7699 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7700 SelectorComponent[] ret; 7701 auto tokens = lexSelector(selector); // this will parse commas too 7702 // and now do comma-separated slices (i haz phobosophobia!) 7703 int parensCount = 0; 7704 while (tokens.length > 0) { 7705 size_t end = 0; 7706 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7707 if(tokens[end] == "(") parensCount++; 7708 if(tokens[end] == ")") parensCount--; 7709 ++end; 7710 } 7711 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7712 if (tokens.length-end < 2) break; 7713 tokens = tokens[end+1..$]; 7714 } 7715 return ret; 7716 } 7717 7718 ///. 7719 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7720 SelectorComponent s; 7721 7722 SelectorPart current; 7723 void commit() { 7724 // might as well skip null items 7725 if(!current.isCleanSlateExceptSeparation()) { 7726 s.parts ~= current; 7727 current = current.init; // start right over 7728 } 7729 } 7730 enum State { 7731 Starting, 7732 ReadingClass, 7733 ReadingId, 7734 ReadingAttributeSelector, 7735 ReadingAttributeComparison, 7736 ExpectingAttributeCloser, 7737 ReadingPseudoClass, 7738 ReadingAttributeValue, 7739 7740 SkippingFunctionalSelector, 7741 } 7742 State state = State.Starting; 7743 string attributeName, attributeValue, attributeComparison; 7744 int parensCount; 7745 foreach(idx, token; tokens) { 7746 string readFunctionalSelector() { 7747 string s; 7748 if(tokens[idx + 1] != "(") 7749 throw new Exception("parse error"); 7750 int pc = 1; 7751 foreach(t; tokens[idx + 2 .. $]) { 7752 if(t == "(") 7753 pc++; 7754 if(t == ")") 7755 pc--; 7756 if(pc == 0) 7757 break; 7758 s ~= t; 7759 } 7760 7761 return s; 7762 } 7763 7764 sizediff_t tid = -1; 7765 foreach(i, item; selectorTokens) 7766 if(token == item) { 7767 tid = i; 7768 break; 7769 } 7770 final switch(state) { 7771 case State.Starting: // fresh, might be reading an operator or a tagname 7772 if(tid == -1) { 7773 if(!caseSensitiveTags) 7774 token = token.toLower(); 7775 7776 if(current.isCleanSlateExceptSeparation()) { 7777 current.tagNameFilter = token; 7778 // default thing, see comment under "*" below 7779 if(current.separation == -1) current.separation = 0; 7780 } else { 7781 // if it was already set, we must see two thingies 7782 // separated by whitespace... 7783 commit(); 7784 current.separation = 0; // tree 7785 current.tagNameFilter = token; 7786 } 7787 } else { 7788 // Selector operators 7789 switch(token) { 7790 case "*": 7791 current.tagNameFilter = "*"; 7792 // the idea here is if we haven't actually set a separation 7793 // yet (e.g. the > operator), it should assume the generic 7794 // whitespace (descendant) mode to avoid matching self with -1 7795 if(current.separation == -1) current.separation = 0; 7796 break; 7797 case " ": 7798 // If some other separation has already been set, 7799 // this is irrelevant whitespace, so we should skip it. 7800 // this happens in the case of "foo > bar" for example. 7801 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7802 continue; 7803 commit(); 7804 current.separation = 0; // tree 7805 break; 7806 case ">>": 7807 commit(); 7808 current.separation = 0; // alternate syntax for tree from html5 css 7809 break; 7810 case ">": 7811 commit(); 7812 current.separation = 1; // child 7813 break; 7814 case "+": 7815 commit(); 7816 current.separation = 2; // sibling directly after 7817 break; 7818 case "~": 7819 commit(); 7820 current.separation = 3; // any sibling after 7821 break; 7822 case "<": 7823 commit(); 7824 current.separation = 4; // immediate parent of 7825 break; 7826 case "[": 7827 state = State.ReadingAttributeSelector; 7828 if(current.separation == -1) current.separation = 0; 7829 break; 7830 case ".": 7831 state = State.ReadingClass; 7832 if(current.separation == -1) current.separation = 0; 7833 break; 7834 case "#": 7835 state = State.ReadingId; 7836 if(current.separation == -1) current.separation = 0; 7837 break; 7838 case ":": 7839 case "::": 7840 state = State.ReadingPseudoClass; 7841 if(current.separation == -1) current.separation = 0; 7842 break; 7843 7844 default: 7845 assert(0, token); 7846 } 7847 } 7848 break; 7849 case State.ReadingClass: 7850 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7851 state = State.Starting; 7852 break; 7853 case State.ReadingId: 7854 current.attributesEqual ~= ["id", token]; 7855 state = State.Starting; 7856 break; 7857 case State.ReadingPseudoClass: 7858 switch(token) { 7859 case "first-of-type": 7860 current.firstOfType = true; 7861 break; 7862 case "last-of-type": 7863 current.lastOfType = true; 7864 break; 7865 case "only-of-type": 7866 current.firstOfType = true; 7867 current.lastOfType = true; 7868 break; 7869 case "first-child": 7870 current.firstChild = true; 7871 break; 7872 case "last-child": 7873 current.lastChild = true; 7874 break; 7875 case "only-child": 7876 current.firstChild = true; 7877 current.lastChild = true; 7878 break; 7879 case "scope": 7880 current.scopeElement = true; 7881 break; 7882 case "empty": 7883 // one with no children 7884 current.emptyElement = true; 7885 break; 7886 case "whitespace-only": 7887 current.whitespaceOnly = true; 7888 break; 7889 case "link": 7890 current.attributesPresent ~= "href"; 7891 break; 7892 case "root": 7893 current.rootElement = true; 7894 break; 7895 case "nth-child": 7896 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7897 state = State.SkippingFunctionalSelector; 7898 continue; 7899 case "nth-of-type": 7900 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7901 state = State.SkippingFunctionalSelector; 7902 continue; 7903 case "nth-last-of-type": 7904 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7905 state = State.SkippingFunctionalSelector; 7906 continue; 7907 case "is": 7908 state = State.SkippingFunctionalSelector; 7909 current.isSelectors ~= readFunctionalSelector(); 7910 continue; // now the rest of the parser skips past the parens we just handled 7911 case "where": 7912 state = State.SkippingFunctionalSelector; 7913 current.whereSelectors ~= readFunctionalSelector(); 7914 continue; // now the rest of the parser skips past the parens we just handled 7915 case "not": 7916 state = State.SkippingFunctionalSelector; 7917 current.notSelectors ~= readFunctionalSelector(); 7918 continue; // now the rest of the parser skips past the parens we just handled 7919 case "has": 7920 state = State.SkippingFunctionalSelector; 7921 current.hasSelectors ~= readFunctionalSelector(); 7922 continue; // now the rest of the parser skips past the parens we just handled 7923 // back to standards though not quite right lol 7924 case "disabled": 7925 current.attributesPresent ~= "disabled"; 7926 break; 7927 case "checked": 7928 current.attributesPresent ~= "checked"; 7929 break; 7930 7931 case "visited", "active", "hover", "target", "focus", "selected": 7932 current.attributesPresent ~= "nothing"; 7933 // FIXME 7934 /+ 7935 // extensions not implemented 7936 //case "text": // takes the text in the element and wraps it in an element, returning it 7937 +/ 7938 goto case; 7939 case "before", "after": 7940 current.attributesPresent ~= "FIXME"; 7941 7942 break; 7943 // My extensions 7944 case "odd-child": 7945 current.oddChild = true; 7946 break; 7947 case "even-child": 7948 current.evenChild = true; 7949 break; 7950 default: 7951 //if(token.indexOf("lang") == -1) 7952 //assert(0, token); 7953 break; 7954 } 7955 state = State.Starting; 7956 break; 7957 case State.SkippingFunctionalSelector: 7958 if(token == "(") { 7959 parensCount++; 7960 } else if(token == ")") { 7961 parensCount--; 7962 } 7963 7964 if(parensCount == 0) 7965 state = State.Starting; 7966 break; 7967 case State.ReadingAttributeSelector: 7968 attributeName = token; 7969 attributeComparison = null; 7970 attributeValue = null; 7971 state = State.ReadingAttributeComparison; 7972 break; 7973 case State.ReadingAttributeComparison: 7974 // FIXME: these things really should be quotable in the proper lexer... 7975 if(token != "]") { 7976 if(token.indexOf("=") == -1) { 7977 // not a comparison; consider it 7978 // part of the attribute 7979 attributeValue ~= token; 7980 } else { 7981 attributeComparison = token; 7982 state = State.ReadingAttributeValue; 7983 } 7984 break; 7985 } 7986 goto case; 7987 case State.ExpectingAttributeCloser: 7988 if(token != "]") { 7989 // not the closer; consider it part of comparison 7990 if(attributeComparison == "") 7991 attributeName ~= token; 7992 else 7993 attributeValue ~= token; 7994 break; 7995 } 7996 7997 // Selector operators 7998 switch(attributeComparison) { 7999 default: assert(0); 8000 case "": 8001 current.attributesPresent ~= attributeName; 8002 break; 8003 case "=": 8004 current.attributesEqual ~= [attributeName, attributeValue]; 8005 break; 8006 case "|=": 8007 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 8008 break; 8009 case "~=": 8010 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 8011 break; 8012 case "$=": 8013 current.attributesEndsWith ~= [attributeName, attributeValue]; 8014 break; 8015 case "^=": 8016 current.attributesStartsWith ~= [attributeName, attributeValue]; 8017 break; 8018 case "*=": 8019 current.attributesInclude ~= [attributeName, attributeValue]; 8020 break; 8021 case "!=": 8022 current.attributesNotEqual ~= [attributeName, attributeValue]; 8023 break; 8024 } 8025 8026 state = State.Starting; 8027 break; 8028 case State.ReadingAttributeValue: 8029 attributeValue = token; 8030 state = State.ExpectingAttributeCloser; 8031 break; 8032 } 8033 } 8034 8035 commit(); 8036 8037 return s; 8038 } 8039 8040 ///. 8041 Element[] removeDuplicates(Element[] input) { 8042 Element[] ret; 8043 8044 bool[Element] already; 8045 foreach(e; input) { 8046 if(e in already) continue; 8047 already[e] = true; 8048 ret ~= e; 8049 } 8050 8051 return ret; 8052 } 8053 8054 // done with CSS selector handling 8055 8056 8057 // FIXME: use the better parser from html.d 8058 /// This is probably not useful to you unless you're writing a browser or something like that. 8059 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 8060 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 8061 class CssStyle { 8062 ///. 8063 this(string rule, string content) { 8064 rule = rule.strip(); 8065 content = content.strip(); 8066 8067 if(content.length == 0) 8068 return; 8069 8070 originatingRule = rule; 8071 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 8072 8073 foreach(part; content.split(";")) { 8074 part = part.strip(); 8075 if(part.length == 0) 8076 continue; 8077 auto idx = part.indexOf(":"); 8078 if(idx == -1) 8079 continue; 8080 //throw new Exception("Bad css rule (no colon): " ~ part); 8081 8082 Property p; 8083 8084 p.name = part[0 .. idx].strip(); 8085 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 8086 p.givenExplicitly = true; 8087 p.specificity = originatingSpecificity; 8088 8089 properties ~= p; 8090 } 8091 8092 foreach(property; properties) 8093 expandShortForm(property, originatingSpecificity); 8094 } 8095 8096 ///. 8097 Specificity getSpecificityOfRule(string rule) { 8098 Specificity s; 8099 if(rule.length == 0) { // inline 8100 // s.important = 2; 8101 } else { 8102 // FIXME 8103 } 8104 8105 return s; 8106 } 8107 8108 string originatingRule; ///. 8109 Specificity originatingSpecificity; ///. 8110 8111 ///. 8112 union Specificity { 8113 uint score; ///. 8114 // version(little_endian) 8115 ///. 8116 struct { 8117 ubyte tags; ///. 8118 ubyte classes; ///. 8119 ubyte ids; ///. 8120 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 8121 } 8122 } 8123 8124 ///. 8125 struct Property { 8126 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 8127 string name; ///. 8128 string value; ///. 8129 Specificity specificity; ///. 8130 // do we care about the original source rule? 8131 } 8132 8133 ///. 8134 Property[] properties; 8135 8136 ///. 8137 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 8138 string name = unCamelCase(nameGiven); 8139 if(value is null) 8140 return getValue(name); 8141 else 8142 return setValue(name, value, 0x02000000 /* inline specificity */); 8143 } 8144 8145 /// takes dash style name 8146 string getValue(string name) { 8147 foreach(property; properties) 8148 if(property.name == name) 8149 return property.value; 8150 return null; 8151 } 8152 8153 /// takes dash style name 8154 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 8155 value = value.replace("! important", "!important"); 8156 if(value.indexOf("!important") != -1) { 8157 newSpecificity.important = 1; // FIXME 8158 value = value.replace("!important", "").strip(); 8159 } 8160 8161 foreach(ref property; properties) 8162 if(property.name == name) { 8163 if(newSpecificity.score >= property.specificity.score) { 8164 property.givenExplicitly = explicit; 8165 expandShortForm(property, newSpecificity); 8166 return (property.value = value); 8167 } else { 8168 if(name == "display") 8169 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8170 return value; // do nothing - the specificity is too low 8171 } 8172 } 8173 8174 // it's not here... 8175 8176 Property p; 8177 p.givenExplicitly = true; 8178 p.name = name; 8179 p.value = value; 8180 p.specificity = originatingSpecificity; 8181 8182 properties ~= p; 8183 expandShortForm(p, originatingSpecificity); 8184 8185 return value; 8186 } 8187 8188 private void expandQuadShort(string name, string value, Specificity specificity) { 8189 auto parts = value.split(" "); 8190 switch(parts.length) { 8191 case 1: 8192 setValue(name ~"-left", parts[0], specificity, false); 8193 setValue(name ~"-right", parts[0], specificity, false); 8194 setValue(name ~"-top", parts[0], specificity, false); 8195 setValue(name ~"-bottom", parts[0], specificity, false); 8196 break; 8197 case 2: 8198 setValue(name ~"-left", parts[1], specificity, false); 8199 setValue(name ~"-right", parts[1], specificity, false); 8200 setValue(name ~"-top", parts[0], specificity, false); 8201 setValue(name ~"-bottom", parts[0], specificity, false); 8202 break; 8203 case 3: 8204 setValue(name ~"-top", parts[0], specificity, false); 8205 setValue(name ~"-right", parts[1], specificity, false); 8206 setValue(name ~"-bottom", parts[2], specificity, false); 8207 setValue(name ~"-left", parts[2], specificity, false); 8208 8209 break; 8210 case 4: 8211 setValue(name ~"-top", parts[0], specificity, false); 8212 setValue(name ~"-right", parts[1], specificity, false); 8213 setValue(name ~"-bottom", parts[2], specificity, false); 8214 setValue(name ~"-left", parts[3], specificity, false); 8215 break; 8216 default: 8217 assert(0, value); 8218 } 8219 } 8220 8221 ///. 8222 void expandShortForm(Property p, Specificity specificity) { 8223 switch(p.name) { 8224 case "margin": 8225 case "padding": 8226 expandQuadShort(p.name, p.value, specificity); 8227 break; 8228 case "border": 8229 case "outline": 8230 setValue(p.name ~ "-left", p.value, specificity, false); 8231 setValue(p.name ~ "-right", p.value, specificity, false); 8232 setValue(p.name ~ "-top", p.value, specificity, false); 8233 setValue(p.name ~ "-bottom", p.value, specificity, false); 8234 break; 8235 8236 case "border-top": 8237 case "border-bottom": 8238 case "border-left": 8239 case "border-right": 8240 case "outline-top": 8241 case "outline-bottom": 8242 case "outline-left": 8243 case "outline-right": 8244 8245 default: {} 8246 } 8247 } 8248 8249 ///. 8250 override string toString() { 8251 string ret; 8252 if(originatingRule.length) 8253 ret = originatingRule ~ " {"; 8254 8255 foreach(property; properties) { 8256 if(!property.givenExplicitly) 8257 continue; // skip the inferred shit 8258 8259 if(originatingRule.length) 8260 ret ~= "\n\t"; 8261 else 8262 ret ~= " "; 8263 8264 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8265 } 8266 8267 if(originatingRule.length) 8268 ret ~= "\n}\n"; 8269 8270 return ret; 8271 } 8272 } 8273 8274 string cssUrl(string url) { 8275 return "url(\"" ~ url ~ "\")"; 8276 } 8277 8278 /// This probably isn't useful, unless you're writing a browser or something like that. 8279 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8280 /// as text. 8281 /// 8282 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8283 /// that you can apply to your documents to build the complete computedStyle object. 8284 class StyleSheet { 8285 ///. 8286 CssStyle[] rules; 8287 8288 ///. 8289 this(string source) { 8290 // FIXME: handle @ rules and probably could improve lexer 8291 // add nesting? 8292 int state; 8293 string currentRule; 8294 string currentValue; 8295 8296 string* currentThing = ¤tRule; 8297 foreach(c; source) { 8298 handle: switch(state) { 8299 default: assert(0); 8300 case 0: // starting - we assume we're reading a rule 8301 switch(c) { 8302 case '@': 8303 state = 4; 8304 break; 8305 case '/': 8306 state = 1; 8307 break; 8308 case '{': 8309 currentThing = ¤tValue; 8310 break; 8311 case '}': 8312 if(currentThing is ¤tValue) { 8313 rules ~= new CssStyle(currentRule, currentValue); 8314 8315 currentRule = ""; 8316 currentValue = ""; 8317 8318 currentThing = ¤tRule; 8319 } else { 8320 // idk what is going on here. 8321 // check sveit.com to reproduce 8322 currentRule = ""; 8323 currentValue = ""; 8324 } 8325 break; 8326 default: 8327 (*currentThing) ~= c; 8328 } 8329 break; 8330 case 1: // expecting * 8331 if(c == '*') 8332 state = 2; 8333 else { 8334 state = 0; 8335 (*currentThing) ~= "/" ~ c; 8336 } 8337 break; 8338 case 2: // inside comment 8339 if(c == '*') 8340 state = 3; 8341 break; 8342 case 3: // expecting / to end comment 8343 if(c == '/') 8344 state = 0; 8345 else 8346 state = 2; // it's just a comment so no need to append 8347 break; 8348 case 4: 8349 if(c == '{') 8350 state = 5; 8351 if(c == ';') 8352 state = 0; // just skipping import 8353 break; 8354 case 5: 8355 if(c == '}') 8356 state = 0; // skipping font face probably 8357 } 8358 } 8359 } 8360 8361 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8362 void apply(Document document) { 8363 foreach(rule; rules) { 8364 if(rule.originatingRule.length == 0) 8365 continue; // this shouldn't happen here in a stylesheet 8366 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8367 // note: this should be a different object than the inline style 8368 // since givenExplicitly is likely destroyed here 8369 auto current = element.computedStyle; 8370 8371 foreach(item; rule.properties) 8372 current.setValue(item.name, item.value, item.specificity); 8373 } 8374 } 8375 } 8376 } 8377 8378 8379 /// This is kinda private; just a little utility container for use by the ElementStream class. 8380 final class Stack(T) { 8381 this() { 8382 internalLength = 0; 8383 arr = initialBuffer[]; 8384 } 8385 8386 ///. 8387 void push(T t) { 8388 if(internalLength >= arr.length) { 8389 auto oldarr = arr; 8390 if(arr.length < 4096) 8391 arr = new T[arr.length * 2]; 8392 else 8393 arr = new T[arr.length + 4096]; 8394 arr[0 .. oldarr.length] = oldarr[]; 8395 } 8396 8397 arr[internalLength] = t; 8398 internalLength++; 8399 } 8400 8401 ///. 8402 T pop() { 8403 assert(internalLength); 8404 internalLength--; 8405 return arr[internalLength]; 8406 } 8407 8408 ///. 8409 T peek() { 8410 assert(internalLength); 8411 return arr[internalLength - 1]; 8412 } 8413 8414 ///. 8415 @property bool empty() { 8416 return internalLength ? false : true; 8417 } 8418 8419 ///. 8420 private T[] arr; 8421 private size_t internalLength; 8422 private T[64] initialBuffer; 8423 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8424 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8425 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8426 } 8427 8428 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8429 final class ElementStream { 8430 8431 ///. 8432 @property Element front() { 8433 return current.element; 8434 } 8435 8436 /// Use Element.tree instead. 8437 this(Element start) { 8438 current.element = start; 8439 current.childPosition = -1; 8440 isEmpty = false; 8441 stack = new Stack!(Current); 8442 } 8443 8444 /* 8445 Handle it 8446 handle its children 8447 8448 */ 8449 8450 ///. 8451 void popFront() { 8452 more: 8453 if(isEmpty) return; 8454 8455 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8456 8457 current.childPosition++; 8458 if(current.childPosition >= current.element.children.length) { 8459 if(stack.empty()) 8460 isEmpty = true; 8461 else { 8462 current = stack.pop(); 8463 goto more; 8464 } 8465 } else { 8466 stack.push(current); 8467 current.element = current.element.children[current.childPosition]; 8468 current.childPosition = -1; 8469 } 8470 } 8471 8472 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8473 void currentKilled() { 8474 if(stack.empty) // should never happen 8475 isEmpty = true; 8476 else { 8477 current = stack.pop(); 8478 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8479 } 8480 } 8481 8482 ///. 8483 @property bool empty() { 8484 return isEmpty; 8485 } 8486 8487 private: 8488 8489 struct Current { 8490 Element element; 8491 int childPosition; 8492 } 8493 8494 Current current; 8495 8496 Stack!(Current) stack; 8497 8498 bool isEmpty; 8499 } 8500 8501 8502 8503 // unbelievable. 8504 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8505 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8506 static import std.algorithm; 8507 auto found = std.algorithm.find(haystack, needle); 8508 if(found.length == 0) 8509 return -1; 8510 return haystack.length - found.length; 8511 } 8512 8513 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8514 assert(position < arr.length); 8515 T[] ret; 8516 ret.length = arr.length + what.length; 8517 int a = 0; 8518 foreach(i; arr[0..position+1]) 8519 ret[a++] = i; 8520 8521 foreach(i; what) 8522 ret[a++] = i; 8523 8524 foreach(i; arr[position+1..$]) 8525 ret[a++] = i; 8526 8527 return ret; 8528 } 8529 8530 package bool isInArray(T)(T item, T[] arr) { 8531 foreach(i; arr) 8532 if(item == i) 8533 return true; 8534 return false; 8535 } 8536 8537 private string[string] aadup(in string[string] arr) { 8538 string[string] ret; 8539 foreach(k, v; arr) 8540 ret[k] = v; 8541 return ret; 8542 } 8543 8544 // dom event support, if you want to use it 8545 8546 /// used for DOM events 8547 version(dom_with_events) 8548 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8549 8550 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8551 version(dom_with_events) 8552 class Event { 8553 this(string eventName, Element target) { 8554 this.eventName = eventName; 8555 this.srcElement = target; 8556 } 8557 8558 /// Prevents the default event handler (if there is one) from being called 8559 void preventDefault() { 8560 defaultPrevented = true; 8561 } 8562 8563 /// Stops the event propagation immediately. 8564 void stopPropagation() { 8565 propagationStopped = true; 8566 } 8567 8568 bool defaultPrevented; 8569 bool propagationStopped; 8570 string eventName; 8571 8572 Element srcElement; 8573 alias srcElement target; 8574 8575 Element relatedTarget; 8576 8577 int clientX; 8578 int clientY; 8579 8580 int button; 8581 8582 bool isBubbling; 8583 8584 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8585 void send() { 8586 if(srcElement is null) 8587 return; 8588 8589 auto e = srcElement; 8590 8591 if(eventName in e.bubblingEventHandlers) 8592 foreach(handler; e.bubblingEventHandlers[eventName]) 8593 handler(e, this); 8594 8595 if(!defaultPrevented) 8596 if(eventName in e.defaultEventHandlers) 8597 e.defaultEventHandlers[eventName](e, this); 8598 } 8599 8600 /// this dispatches the element using the capture -> target -> bubble process 8601 void dispatch() { 8602 if(srcElement is null) 8603 return; 8604 8605 // first capture, then bubble 8606 8607 Element[] chain; 8608 Element curr = srcElement; 8609 while(curr) { 8610 auto l = curr; 8611 chain ~= l; 8612 curr = curr.parentNode; 8613 8614 } 8615 8616 isBubbling = false; 8617 8618 foreach(e; chain.retro()) { 8619 if(eventName in e.capturingEventHandlers) 8620 foreach(handler; e.capturingEventHandlers[eventName]) 8621 handler(e, this); 8622 8623 // the default on capture should really be to always do nothing 8624 8625 //if(!defaultPrevented) 8626 // if(eventName in e.defaultEventHandlers) 8627 // e.defaultEventHandlers[eventName](e.element, this); 8628 8629 if(propagationStopped) 8630 break; 8631 } 8632 8633 isBubbling = true; 8634 if(!propagationStopped) 8635 foreach(e; chain) { 8636 if(eventName in e.bubblingEventHandlers) 8637 foreach(handler; e.bubblingEventHandlers[eventName]) 8638 handler(e, this); 8639 8640 if(propagationStopped) 8641 break; 8642 } 8643 8644 if(!defaultPrevented) 8645 foreach(e; chain) { 8646 if(eventName in e.defaultEventHandlers) 8647 e.defaultEventHandlers[eventName](e, this); 8648 } 8649 } 8650 } 8651 8652 struct FormFieldOptions { 8653 // usable for any 8654 8655 /// this is a regex pattern used to validate the field 8656 string pattern; 8657 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8658 bool isRequired; 8659 /// this is displayed as an example to the user 8660 string placeholder; 8661 8662 // usable for numeric ones 8663 8664 8665 // convenience methods to quickly get some options 8666 @property static FormFieldOptions none() { 8667 FormFieldOptions f; 8668 return f; 8669 } 8670 8671 static FormFieldOptions required() { 8672 FormFieldOptions f; 8673 f.isRequired = true; 8674 return f; 8675 } 8676 8677 static FormFieldOptions regex(string pattern, bool required = false) { 8678 FormFieldOptions f; 8679 f.pattern = pattern; 8680 f.isRequired = required; 8681 return f; 8682 } 8683 8684 static FormFieldOptions fromElement(Element e) { 8685 FormFieldOptions f; 8686 if(e.hasAttribute("required")) 8687 f.isRequired = true; 8688 if(e.hasAttribute("pattern")) 8689 f.pattern = e.pattern; 8690 if(e.hasAttribute("placeholder")) 8691 f.placeholder = e.placeholder; 8692 return f; 8693 } 8694 8695 Element applyToElement(Element e) { 8696 if(this.isRequired) 8697 e.required = "required"; 8698 if(this.pattern.length) 8699 e.pattern = this.pattern; 8700 if(this.placeholder.length) 8701 e.placeholder = this.placeholder; 8702 return e; 8703 } 8704 } 8705 8706 // this needs to look just like a string, but can expand as needed 8707 version(no_dom_stream) 8708 alias string Utf8Stream; 8709 else 8710 class Utf8Stream { 8711 protected: 8712 // these two should be overridden in subclasses to actually do the stream magic 8713 string getMore() { 8714 if(getMoreHelper !is null) 8715 return getMoreHelper(); 8716 return null; 8717 } 8718 8719 bool hasMore() { 8720 if(hasMoreHelper !is null) 8721 return hasMoreHelper(); 8722 return false; 8723 } 8724 // the rest should be ok 8725 8726 public: 8727 this(string d) { 8728 this.data = d; 8729 } 8730 8731 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8732 this.getMoreHelper = getMoreHelper; 8733 this.hasMoreHelper = hasMoreHelper; 8734 8735 if(hasMore()) 8736 this.data ~= getMore(); 8737 8738 stdout.flush(); 8739 } 8740 8741 @property final size_t length() { 8742 // the parser checks length primarily directly before accessing the next character 8743 // so this is the place we'll hook to append more if possible and needed. 8744 if(lastIdx + 1 >= data.length && hasMore()) { 8745 data ~= getMore(); 8746 } 8747 return data.length; 8748 } 8749 8750 final char opIndex(size_t idx) { 8751 if(idx > lastIdx) 8752 lastIdx = idx; 8753 return data[idx]; 8754 } 8755 8756 final string opSlice(size_t start, size_t end) { 8757 if(end > lastIdx) 8758 lastIdx = end; 8759 return data[start .. end]; 8760 } 8761 8762 final size_t opDollar() { 8763 return length(); 8764 } 8765 8766 final Utf8Stream opBinary(string op : "~")(string s) { 8767 this.data ~= s; 8768 return this; 8769 } 8770 8771 final Utf8Stream opOpAssign(string op : "~")(string s) { 8772 this.data ~= s; 8773 return this; 8774 } 8775 8776 final Utf8Stream opAssign(string rhs) { 8777 this.data = rhs; 8778 return this; 8779 } 8780 private: 8781 string data; 8782 8783 size_t lastIdx; 8784 8785 bool delegate() hasMoreHelper; 8786 string delegate() getMoreHelper; 8787 8788 8789 /+ 8790 // used to maybe clear some old stuff 8791 // you might have to remove elements parsed with it too since they can hold slices into the 8792 // old stuff, preventing gc 8793 void dropFront(int bytes) { 8794 posAdjustment += bytes; 8795 data = data[bytes .. $]; 8796 } 8797 8798 int posAdjustment; 8799 +/ 8800 } 8801 8802 void fillForm(T)(Form form, T obj, string name) { 8803 import arsd.database; 8804 fillData((k, v) => form.setValue(k, v), obj, name); 8805 } 8806 8807 8808 /+ 8809 /+ 8810 Syntax: 8811 8812 Tag: tagname#id.class 8813 Tree: Tag(Children, comma, separated...) 8814 Children: Tee or Variable 8815 Variable: $varname with optional |funcname following. 8816 8817 If a variable has a tree after it, it breaks the variable down: 8818 * if array, foreach it does the tree 8819 * if struct, it breaks down the member variables 8820 8821 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8822 +/ 8823 struct Stringplate { 8824 /++ 8825 8826 +/ 8827 this(string s) { 8828 8829 } 8830 8831 /++ 8832 8833 +/ 8834 Element expand(T...)(T vars) { 8835 return null; 8836 } 8837 } 8838 /// 8839 unittest { 8840 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8841 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8842 } 8843 +/ 8844 8845 bool allAreInlineHtml(const(Element)[] children, const string[] inlineElements) { 8846 foreach(child; children) { 8847 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8848 // cool 8849 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children, inlineElements)) { 8850 // cool 8851 } else { 8852 // prolly block 8853 return false; 8854 } 8855 } 8856 return true; 8857 } 8858 8859 private bool isSimpleWhite(dchar c) { 8860 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8861 } 8862 8863 unittest { 8864 // Test for issue #120 8865 string s = `<html> 8866 <body> 8867 <P>AN 8868 <P>bubbles</P> 8869 <P>giggles</P> 8870 </body> 8871 </html>`; 8872 auto doc = new Document(); 8873 doc.parseUtf8(s, false, false); 8874 auto s2 = doc.toString(); 8875 assert( 8876 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8877 "paragraph order incorrect:\n" ~ s2); 8878 } 8879 8880 unittest { 8881 // test for suncarpet email dec 24 2019 8882 // arbitrary id asduiwh 8883 auto document = new Document("<html> 8884 <head> 8885 <meta charset=\"utf-8\"></meta> 8886 <title>Element.querySelector Test</title> 8887 </head> 8888 <body> 8889 <div id=\"foo\"> 8890 <div>Foo</div> 8891 <div>Bar</div> 8892 </div> 8893 </body> 8894 </html>"); 8895 8896 auto doc = document; 8897 8898 assert(doc.querySelectorAll("div div").length == 2); 8899 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8900 assert(doc.querySelectorAll("> html").length == 0); 8901 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8902 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8903 8904 8905 assert(doc.root.matches("html")); 8906 assert(!doc.root.matches("nothtml")); 8907 assert(doc.querySelector("#foo > div").matches("div")); 8908 assert(doc.querySelector("body > #foo").matches("#foo")); 8909 8910 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8911 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8912 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8913 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8914 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8915 8916 // also confirming the querySelector works via the mdn definition 8917 auto foo = doc.requireSelector("#foo"); 8918 assert(foo.querySelector("#foo > div") !is null); 8919 assert(foo.querySelector("body #foo > div") !is null); 8920 8921 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 8922 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 8923 //assert(foo.querySelectorAll("#foo > div").length == 2); 8924 } 8925 8926 unittest { 8927 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8928 auto document = new Document(`<article> 8929 <div id="div-01">Here is div-01 8930 <div id="div-02">Here is div-02 8931 <div id="div-03">Here is div-03</div> 8932 </div> 8933 </div> 8934 </article>`, true, true); 8935 8936 auto el = document.getElementById("div-03"); 8937 assert(el.closest("#div-02").id == "div-02"); 8938 assert(el.closest("div div").id == "div-03"); 8939 assert(el.closest("article > div").id == "div-01"); 8940 assert(el.closest(":not(div)").tagName == "article"); 8941 8942 assert(el.closest("p") is null); 8943 assert(el.closest("p, div") is el); 8944 } 8945 8946 unittest { 8947 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 8948 auto document = new Document(`<test> 8949 <div class="foo"><p>cool</p><span>bar</span></div> 8950 <main><p>two</p></main> 8951 </test>`); 8952 8953 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 8954 assert(document.querySelector("div:where(.foo)") !is null); 8955 } 8956 8957 unittest { 8958 immutable string html = q{ 8959 <root> 8960 <div class="roundedbox"> 8961 <table> 8962 <caption class="boxheader">Recent Reviews</caption> 8963 <tr> 8964 <th>Game</th> 8965 <th>User</th> 8966 <th>Rating</th> 8967 <th>Created</th> 8968 </tr> 8969 8970 <tr> 8971 <td>June 13, 2020 15:10</td> 8972 <td><a href="/reviews/8833">[Show]</a></td> 8973 </tr> 8974 8975 <tr> 8976 <td>June 13, 2020 15:02</td> 8977 <td><a href="/reviews/8832">[Show]</a></td> 8978 </tr> 8979 8980 <tr> 8981 <td>June 13, 2020 14:41</td> 8982 <td><a href="/reviews/8831">[Show]</a></td> 8983 </tr> 8984 </table> 8985 </div> 8986 </root> 8987 }; 8988 8989 auto doc = new Document(cast(string)html); 8990 // this should select the second table row, but... 8991 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8992 assert(rd !is null); 8993 assert(rd.href == "/reviews/8832"); 8994 8995 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8996 assert(rd !is null); 8997 assert(rd.href == "/reviews/8832"); 8998 } 8999 9000 unittest { 9001 try { 9002 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 9003 assert(0); 9004 } catch(Exception e) { 9005 // good; it should throw an exception, not an error. 9006 } 9007 } 9008 9009 /* 9010 Copyright: Adam D. Ruppe, 2010 - 2021 9011 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 9012 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 9013 9014 Copyright Adam D. Ruppe 2010-2021. 9015 Distributed under the Boost Software License, Version 1.0. 9016 (See accompanying file LICENSE_1_0.txt or copy at 9017 http://www.boost.org/LICENSE_1_0.txt) 9018 */ 9019 9020