1 // FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it. 2 // FIXME: xml namespace support??? 3 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 4 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 5 6 // FIXME: the scriptable list is quite arbitrary 7 8 // FIXME: https://developer.mozilla.org/en-US/docs/Web/CSS/:is 9 10 11 // xml entity references?! 12 13 /++ 14 This is an html DOM implementation, started with cloning 15 what the browser offers in Javascript, but going well beyond 16 it in convenience. 17 18 If you can do it in Javascript, you can probably do it with 19 this module, and much more. 20 21 --- 22 import arsd.dom; 23 24 void main() { 25 auto document = new Document("<html><p>paragraph</p></html>"); 26 writeln(document.querySelector("p")); 27 document.root.innerHTML = "<p>hey</p>"; 28 writeln(document); 29 } 30 --- 31 32 BTW: this file optionally depends on `arsd.characterencodings`, to 33 help it correctly read files from the internet. You should be able to 34 get characterencodings.d from the same place you got this file. 35 36 If you want it to stand alone, just always use the `Document.parseUtf8` 37 function or the constructor that takes a string. 38 39 Symbol_groups: 40 41 core_functionality = 42 43 These members provide core functionality. The members on these classes 44 will provide most your direct interaction. 45 46 bonus_functionality = 47 48 These provide additional functionality for special use cases. 49 50 implementations = 51 52 These provide implementations of other functionality. 53 +/ 54 module arsd.dom; 55 56 // FIXME: support the css standard namespace thing in the selectors too 57 58 version(with_arsd_jsvar) 59 import arsd.jsvar; 60 else { 61 enum scriptable = "arsd_jsvar_compatible"; 62 } 63 64 // this is only meant to be used at compile time, as a filter for opDispatch 65 // lists the attributes we want to allow without the use of .attr 66 bool isConvenientAttribute(string name) { 67 static immutable list = [ 68 "name", "id", "href", "value", 69 "checked", "selected", "type", 70 "src", "content", "pattern", 71 "placeholder", "required", "alt", 72 "rel", 73 "method", "action", "enctype" 74 ]; 75 foreach(l; list) 76 if(name == l) return true; 77 return false; 78 } 79 80 81 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 82 // FIXME: failing to close a paragraph sometimes messes things up too 83 84 // FIXME: it would be kinda cool to have some support for internal DTDs 85 // and maybe XPath as well, to some extent 86 /* 87 we could do 88 meh this sux 89 90 auto xpath = XPath(element); 91 92 // get the first p 93 xpath.p[0].a["href"] 94 */ 95 96 97 /// The main document interface, including a html parser. 98 /// Group: core_functionality 99 class Document : FileResource { 100 /// Convenience method for web scraping. Requires [arsd.http2] to be 101 /// included in the build as well as [arsd.characterencodings]. 102 static Document fromUrl()(string url, bool strictMode = false) { 103 import arsd.http2; 104 auto client = new HttpClient(); 105 106 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 107 auto res = req.waitForCompletion(); 108 109 auto document = new Document(); 110 if(strictMode) { 111 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 112 } else { 113 document.parseGarbage(cast(string) res.content); 114 } 115 116 return document; 117 } 118 119 ///. 120 this(string data, bool caseSensitive = false, bool strict = false) { 121 parseUtf8(data, caseSensitive, strict); 122 } 123 124 /** 125 Creates an empty document. It has *nothing* in it at all. 126 */ 127 this() { 128 129 } 130 131 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 132 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 133 /// can chain it. 134 /// 135 /// Example: document["p"].innerText("hello").addClass("modified"); 136 /// 137 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 138 /// 139 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 140 /// 141 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 142 /// you could put in some kind of custom filter function tho. 143 ElementCollection opIndex(string selector) { 144 auto e = ElementCollection(this.root); 145 return e[selector]; 146 } 147 148 string _contentType = "text/html; charset=utf-8"; 149 150 /// If you're using this for some other kind of XML, you can 151 /// set the content type here. 152 /// 153 /// Note: this has no impact on the function of this class. 154 /// It is only used if the document is sent via a protocol like HTTP. 155 /// 156 /// This may be called by parse() if it recognizes the data. Otherwise, 157 /// if you don't set it, it assumes text/html; charset=utf-8. 158 @property string contentType(string mimeType) { 159 _contentType = mimeType; 160 return _contentType; 161 } 162 163 /// implementing the FileResource interface, useful for sending via 164 /// http automatically. 165 override @property string contentType() const { 166 return _contentType; 167 } 168 169 /// implementing the FileResource interface; it calls toString. 170 override immutable(ubyte)[] getData() const { 171 return cast(immutable(ubyte)[]) this.toString(); 172 } 173 174 175 /// Concatenates any consecutive text nodes 176 /* 177 void normalize() { 178 179 } 180 */ 181 182 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 183 /// Call this before calling parse(). 184 185 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 186 void enableAddingSpecialTagsToDom() { 187 parseSawComment = (string) => true; 188 parseSawAspCode = (string) => true; 189 parseSawPhpCode = (string) => true; 190 parseSawQuestionInstruction = (string) => true; 191 parseSawBangInstruction = (string) => true; 192 } 193 194 /// If the parser sees a html comment, it will call this callback 195 /// <!-- comment --> will call parseSawComment(" comment ") 196 /// Return true if you want the node appended to the document. 197 bool delegate(string) parseSawComment; 198 199 /// If the parser sees <% asp code... %>, it will call this callback. 200 /// It will be passed "% asp code... %" or "%= asp code .. %" 201 /// Return true if you want the node appended to the document. 202 bool delegate(string) parseSawAspCode; 203 204 /// If the parser sees <?php php code... ?>, it will call this callback. 205 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 206 /// Note: dom.d cannot identify the other php <? code ?> short format. 207 /// Return true if you want the node appended to the document. 208 bool delegate(string) parseSawPhpCode; 209 210 /// if it sees a <?xxx> that is not php or asp 211 /// it calls this function with the contents. 212 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 213 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 214 /// Return true if you want the node appended to the document. 215 bool delegate(string) parseSawQuestionInstruction; 216 217 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 218 /// it calls this function with the contents. 219 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 220 /// Return true if you want the node appended to the document. 221 bool delegate(string) parseSawBangInstruction; 222 223 /// Given the kind of garbage you find on the Internet, try to make sense of it. 224 /// Equivalent to document.parse(data, false, false, null); 225 /// (Case-insensitive, non-strict, determine character encoding from the data.) 226 227 /// NOTE: this makes no attempt at added security. 228 /// 229 /// It is a template so it lazily imports characterencodings. 230 void parseGarbage()(string data) { 231 parse(data, false, false, null); 232 } 233 234 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 235 /// Will throw exceptions on things like unclosed tags. 236 void parseStrict(string data) { 237 parseStream(toUtf8Stream(data), true, true); 238 } 239 240 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 241 /// tag soup, but does NOT try to correct bad character encodings. 242 /// 243 /// They will still throw an exception. 244 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 245 parseStream(toUtf8Stream(data), caseSensitive, strict); 246 } 247 248 // this is a template so we get lazy import behavior 249 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 250 import arsd.characterencodings; 251 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 252 if(dataEncoding is null) { 253 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 254 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 255 // Now, XML and HTML can both list encoding in the document, but we can't really parse 256 // it here without changing a lot of code until we know the encoding. So I'm going to 257 // do some hackish string checking. 258 if(dataEncoding is null) { 259 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 260 // first, look for an XML prolog 261 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 262 if(idx != -1) { 263 idx += "encoding=\"".length; 264 // we're probably past the prolog if it's this far in; we might be looking at 265 // content. Forget about it. 266 if(idx > 100) 267 idx = -1; 268 } 269 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 270 if(idx == -1) { 271 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 272 if(idx != -1) { 273 idx += "charset=".length; 274 if(dataAsBytes[idx] == '"') 275 idx++; 276 } 277 } 278 279 // found something in either branch... 280 if(idx != -1) { 281 // read till a quote or about 12 chars, whichever comes first... 282 auto end = idx; 283 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 284 end++; 285 286 dataEncoding = cast(string) dataAsBytes[idx .. end]; 287 } 288 // otherwise, we just don't know. 289 } 290 } 291 292 if(dataEncoding is null) { 293 if(strict) 294 throw new MarkupException("I couldn't figure out the encoding of this document."); 295 else 296 // if we really don't know by here, it means we already tried UTF-8, 297 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 298 // tags... let's assume it's Windows-1252, since that's probably the most 299 // common aside from utf that wouldn't be labeled. 300 301 dataEncoding = "Windows 1252"; 302 } 303 304 // and now, go ahead and convert it. 305 306 string data; 307 308 if(!strict) { 309 // if we're in non-strict mode, we need to check 310 // the document for mislabeling too; sometimes 311 // web documents will say they are utf-8, but aren't 312 // actually properly encoded. If it fails to validate, 313 // we'll assume it's actually Windows encoding - the most 314 // likely candidate for mislabeled garbage. 315 dataEncoding = dataEncoding.toLower(); 316 dataEncoding = dataEncoding.replace(" ", ""); 317 dataEncoding = dataEncoding.replace("-", ""); 318 dataEncoding = dataEncoding.replace("_", ""); 319 if(dataEncoding == "utf8") { 320 try { 321 validate(rawdata); 322 } catch(UTFException e) { 323 dataEncoding = "Windows 1252"; 324 } 325 } 326 } 327 328 if(dataEncoding != "UTF-8") { 329 if(strict) 330 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 331 else { 332 try { 333 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 334 } catch(Exception e) { 335 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 336 } 337 } 338 } else 339 data = rawdata; 340 341 return toUtf8Stream(data); 342 } 343 344 private 345 Utf8Stream toUtf8Stream(in string rawdata) { 346 string data = rawdata; 347 static if(is(Utf8Stream == string)) 348 return data; 349 else 350 return new Utf8Stream(data); 351 } 352 353 /** 354 Take XMLish data and try to make the DOM tree out of it. 355 356 The goal isn't to be perfect, but to just be good enough to 357 approximate Javascript's behavior. 358 359 If strict, it throws on something that doesn't make sense. 360 (Examples: mismatched tags. It doesn't validate!) 361 If not strict, it tries to recover anyway, and only throws 362 when something is REALLY unworkable. 363 364 If strict is false, it uses a magic list of tags that needn't 365 be closed. If you are writing a document specifically for this, 366 try to avoid such - use self closed tags at least. Easier to parse. 367 368 The dataEncoding argument can be used to pass a specific 369 charset encoding for automatic conversion. If null (which is NOT 370 the default!), it tries to determine from the data itself, 371 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 372 373 If this assumption is wrong, it can throw on non-ascii 374 characters! 375 376 377 Note that it previously assumed the data was encoded as UTF-8, which 378 is why the dataEncoding argument defaults to that. 379 380 So it shouldn't break backward compatibility. 381 382 But, if you want the best behavior on wild data - figuring it out from the document 383 instead of assuming - you'll probably want to change that argument to null. 384 385 This is a template so it lazily imports arsd.characterencodings, which is required 386 to fix up data encodings. 387 388 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 389 dependency. If it is data from the Internet though, a random website, the encoding 390 is often a lie. This function, if dataEncoding == null, can correct for that, or 391 you can try parseGarbage. In those cases, arsd.characterencodings is required to 392 compile. 393 */ 394 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 395 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 396 parseStream(data, caseSensitive, strict); 397 } 398 399 // note: this work best in strict mode, unless data is just a simple string wrapper 400 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 401 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 402 // of my big app. 403 404 assert(data !is null); 405 406 // go through character by character. 407 // if you see a <, consider it a tag. 408 // name goes until the first non tagname character 409 // then see if it self closes or has an attribute 410 411 // if not in a tag, anything not a tag is a big text 412 // node child. It ends as soon as it sees a < 413 414 // Whitespace in text or attributes is preserved, but not between attributes 415 416 // & and friends are converted when I know them, left the same otherwise 417 418 419 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 420 //validate(data); // it *must* be UTF-8 for this to work correctly 421 422 sizediff_t pos = 0; 423 424 clear(); 425 426 loose = !caseSensitive; 427 428 bool sawImproperNesting = false; 429 bool paragraphHackfixRequired = false; 430 431 int getLineNumber(sizediff_t p) { 432 int line = 1; 433 foreach(c; data[0..p]) 434 if(c == '\n') 435 line++; 436 return line; 437 } 438 439 void parseError(string message) { 440 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 441 } 442 443 bool eatWhitespace() { 444 bool ateAny = false; 445 while(pos < data.length && data[pos].isSimpleWhite) { 446 pos++; 447 ateAny = true; 448 } 449 return ateAny; 450 } 451 452 string readTagName() { 453 // remember to include : for namespaces 454 // basically just keep going until >, /, or whitespace 455 auto start = pos; 456 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 457 { 458 pos++; 459 if(pos == data.length) { 460 if(strict) 461 throw new Exception("tag name incomplete when file ended"); 462 else 463 break; 464 } 465 } 466 467 if(!caseSensitive) 468 return toLower(data[start..pos]); 469 else 470 return data[start..pos]; 471 } 472 473 string readAttributeName() { 474 // remember to include : for namespaces 475 // basically just keep going until >, /, or whitespace 476 auto start = pos; 477 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 478 { 479 if(data[pos] == '<') { 480 if(strict) 481 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 482 else 483 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 484 } 485 pos++; 486 if(pos == data.length) { 487 if(strict) 488 throw new Exception("unterminated attribute name"); 489 else 490 break; 491 } 492 } 493 494 if(!caseSensitive) 495 return toLower(data[start..pos]); 496 else 497 return data[start..pos]; 498 } 499 500 string readAttributeValue() { 501 if(pos >= data.length) { 502 if(strict) 503 throw new Exception("no attribute value before end of file"); 504 else 505 return null; 506 } 507 switch(data[pos]) { 508 case '\'': 509 case '"': 510 auto started = pos; 511 char end = data[pos]; 512 pos++; 513 auto start = pos; 514 while(pos < data.length && data[pos] != end) 515 pos++; 516 if(strict && pos == data.length) 517 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 518 string v = htmlEntitiesDecode(data[start..pos], strict); 519 pos++; // skip over the end 520 return v; 521 default: 522 if(strict) 523 parseError("Attributes must be quoted"); 524 // read until whitespace or terminator (/> or >) 525 auto start = pos; 526 while( 527 pos < data.length && 528 data[pos] != '>' && 529 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 530 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 531 !data[pos].isSimpleWhite) 532 pos++; 533 534 string v = htmlEntitiesDecode(data[start..pos], strict); 535 // don't skip the end - we'll need it later 536 return v; 537 } 538 } 539 540 TextNode readTextNode() { 541 auto start = pos; 542 while(pos < data.length && data[pos] != '<') { 543 pos++; 544 } 545 546 return TextNode.fromUndecodedString(this, data[start..pos]); 547 } 548 549 // this is obsolete! 550 RawSource readCDataNode() { 551 auto start = pos; 552 while(pos < data.length && data[pos] != '<') { 553 pos++; 554 } 555 556 return new RawSource(this, data[start..pos]); 557 } 558 559 560 struct Ele { 561 int type; // element or closing tag or nothing 562 /* 563 type == 0 means regular node, self-closed (element is valid) 564 type == 1 means closing tag (payload is the tag name, element may be valid) 565 type == 2 means you should ignore it completely 566 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 567 type == 4 means the document was totally empty 568 */ 569 Element element; // for type == 0 or type == 3 570 string payload; // for type == 1 571 } 572 // recursively read a tag 573 Ele readElement(string[] parentChain = null) { 574 // FIXME: this is the slowest function in this module, by far, even in strict mode. 575 // Loose mode should perform decently, but strict mode is the important one. 576 if(!strict && parentChain is null) 577 parentChain = []; 578 579 static string[] recentAutoClosedTags; 580 581 if(pos >= data.length) 582 { 583 if(strict) { 584 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 585 } else { 586 if(parentChain.length) 587 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 588 else 589 return Ele(4); // signal emptiness upstream 590 } 591 } 592 593 if(data[pos] != '<') { 594 return Ele(0, readTextNode(), null); 595 } 596 597 enforce(data[pos] == '<'); 598 pos++; 599 if(pos == data.length) { 600 if(strict) 601 throw new MarkupException("Found trailing < at end of file"); 602 // if not strict, we'll just skip the switch 603 } else 604 switch(data[pos]) { 605 // I don't care about these, so I just want to skip them 606 case '!': // might be a comment, a doctype, or a special instruction 607 pos++; 608 609 // FIXME: we should store these in the tree too 610 // though I like having it stripped out tbh. 611 612 if(pos == data.length) { 613 if(strict) 614 throw new MarkupException("<! opened at end of file"); 615 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 616 // comment 617 pos += 2; 618 619 // FIXME: technically, a comment is anything 620 // between -- and -- inside a <!> block. 621 // so in <!-- test -- lol> , the " lol" is NOT a comment 622 // and should probably be handled differently in here, but for now 623 // I'll just keep running until --> since that's the common way 624 625 auto commentStart = pos; 626 while(pos+3 < data.length && data[pos..pos+3] != "-->") 627 pos++; 628 629 auto end = commentStart; 630 631 if(pos + 3 >= data.length) { 632 if(strict) 633 throw new MarkupException("unclosed comment"); 634 end = data.length; 635 pos = data.length; 636 } else { 637 end = pos; 638 assert(data[pos] == '-'); 639 pos++; 640 assert(data[pos] == '-'); 641 pos++; 642 assert(data[pos] == '>'); 643 pos++; 644 } 645 646 if(parseSawComment !is null) 647 if(parseSawComment(data[commentStart .. end])) { 648 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 649 } 650 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 651 pos += 7; 652 653 auto cdataStart = pos; 654 655 ptrdiff_t end = -1; 656 typeof(end) cdataEnd; 657 658 if(pos < data.length) { 659 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 660 end = data[pos .. $].indexOf("]]>"); 661 } 662 663 if(end == -1) { 664 if(strict) 665 throw new MarkupException("Unclosed CDATA section"); 666 end = pos; 667 cdataEnd = pos; 668 } else { 669 cdataEnd = pos + end; 670 pos = cdataEnd + 3; 671 } 672 673 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 674 } else { 675 auto start = pos; 676 while(pos < data.length && data[pos] != '>') 677 pos++; 678 679 auto bangEnds = pos; 680 if(pos == data.length) { 681 if(strict) 682 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 683 } else pos++; // skipping the > 684 685 if(parseSawBangInstruction !is null) 686 if(parseSawBangInstruction(data[start .. bangEnds])) { 687 // FIXME: these should be able to modify the parser state, 688 // doing things like adding entities, somehow. 689 690 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 691 } 692 } 693 694 /* 695 if(pos < data.length && data[pos] == '>') 696 pos++; // skip the > 697 else 698 assert(!strict); 699 */ 700 break; 701 case '%': 702 case '?': 703 /* 704 Here's what we want to support: 705 706 <% asp code %> 707 <%= asp code %> 708 <?php php code ?> 709 <?= php code ?> 710 711 The contents don't really matter, just if it opens with 712 one of the above for, it ends on the two char terminator. 713 714 <?something> 715 this is NOT php code 716 because I've seen this in the wild: <?EM-dummyText> 717 718 This could be php with shorttags which would be cut off 719 prematurely because if(a >) - that > counts as the close 720 of the tag, but since dom.d can't tell the difference 721 between that and the <?EM> real world example, it will 722 not try to look for the ?> ending. 723 724 The difference between this and the asp/php stuff is that it 725 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 726 on >. 727 */ 728 729 char end = data[pos]; 730 auto started = pos; 731 bool isAsp = end == '%'; 732 int currentIndex = 0; 733 bool isPhp = false; 734 bool isEqualTag = false; 735 int phpCount = 0; 736 737 more: 738 pos++; // skip the start 739 if(pos == data.length) { 740 if(strict) 741 throw new MarkupException("Unclosed <"~end~" by end of file"); 742 } else { 743 currentIndex++; 744 if(currentIndex == 1 && data[pos] == '=') { 745 if(!isAsp) 746 isPhp = true; 747 isEqualTag = true; 748 goto more; 749 } 750 if(currentIndex == 1 && data[pos] == 'p') 751 phpCount++; 752 if(currentIndex == 2 && data[pos] == 'h') 753 phpCount++; 754 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 755 isPhp = true; 756 757 if(data[pos] == '>') { 758 if((isAsp || isPhp) && data[pos - 1] != end) 759 goto more; 760 // otherwise we're done 761 } else 762 goto more; 763 } 764 765 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 766 auto code = data[started .. pos]; 767 768 769 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 770 if(pos < data.length) 771 pos++; // get past the > 772 773 if(isAsp && parseSawAspCode !is null) { 774 if(parseSawAspCode(code)) { 775 return Ele(3, new AspCode(this, code), null); 776 } 777 } else if(isPhp && parseSawPhpCode !is null) { 778 if(parseSawPhpCode(code)) { 779 return Ele(3, new PhpCode(this, code), null); 780 } 781 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 782 if(parseSawQuestionInstruction(code)) { 783 return Ele(3, new QuestionInstruction(this, code), null); 784 } 785 } 786 break; 787 case '/': // closing an element 788 pos++; // skip the start 789 auto p = pos; 790 while(pos < data.length && data[pos] != '>') 791 pos++; 792 //writefln("</%s>", data[p..pos]); 793 if(pos == data.length && data[pos-1] != '>') { 794 if(strict) 795 throw new MarkupException("File ended before closing tag had a required >"); 796 else 797 data ~= ">"; // just hack it in 798 } 799 pos++; // skip the '>' 800 801 string tname = data[p..pos-1]; 802 if(!caseSensitive) 803 tname = tname.toLower(); 804 805 return Ele(1, null, tname); // closing tag reports itself here 806 case ' ': // assume it isn't a real element... 807 if(strict) 808 parseError("bad markup - improperly placed <"); 809 else 810 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 811 break; 812 default: 813 814 if(!strict) { 815 // what about something that kinda looks like a tag, but isn't? 816 auto nextTag = data[pos .. $].indexOf("<"); 817 auto closeTag = data[pos .. $].indexOf(">"); 818 if(closeTag != -1 && nextTag != -1) 819 if(nextTag < closeTag) { 820 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 821 822 auto equal = data[pos .. $].indexOf("=\""); 823 if(equal != -1 && equal < closeTag) { 824 // this MIGHT be ok, soldier on 825 } else { 826 // definitely no good, this must be a (horribly distorted) text node 827 pos++; // skip the < we're on - don't want text node to end prematurely 828 auto node = readTextNode(); 829 node.contents = "<" ~ node.contents; // put this back 830 return Ele(0, node, null); 831 } 832 } 833 } 834 835 string tagName = readTagName(); 836 string[string] attributes; 837 838 Ele addTag(bool selfClosed) { 839 if(selfClosed) 840 pos++; 841 else { 842 if(!strict) 843 if(tagName.isInArray(selfClosedElements)) 844 // these are de-facto self closed 845 selfClosed = true; 846 } 847 848 if(strict) 849 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[pos - 100 .. pos + 100])); 850 else { 851 // if we got here, it's probably because a slash was in an 852 // unquoted attribute - don't trust the selfClosed value 853 if(!selfClosed) 854 selfClosed = tagName.isInArray(selfClosedElements); 855 856 while(pos < data.length && data[pos] != '>') 857 pos++; 858 859 if(pos >= data.length) { 860 // the tag never closed 861 assert(data.length != 0); 862 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 863 } 864 } 865 866 auto whereThisTagStarted = pos; // for better error messages 867 868 pos++; 869 870 auto e = createElement(tagName); 871 e.attributes = attributes; 872 version(dom_node_indexes) { 873 if(e.dataset.nodeIndex.length == 0) 874 e.dataset.nodeIndex = to!string(&(e.attributes)); 875 } 876 e.selfClosed = selfClosed; 877 e.parseAttributes(); 878 879 880 // HACK to handle script and style as a raw data section as it is in HTML browsers 881 if(tagName == "script" || tagName == "style") { 882 if(!selfClosed) { 883 string closer = "</" ~ tagName ~ ">"; 884 ptrdiff_t ending; 885 if(pos >= data.length) 886 ending = -1; 887 else 888 ending = indexOf(data[pos..$], closer); 889 890 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 891 /* 892 if(loose && ending == -1 && pos < data.length) 893 ending = indexOf(data[pos..$], closer.toUpper()); 894 */ 895 if(ending == -1) { 896 if(strict) 897 throw new Exception("tag " ~ tagName ~ " never closed"); 898 else { 899 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 900 if(pos < data.length) { 901 e = new TextNode(this, data[pos .. $]); 902 pos = data.length; 903 } 904 } 905 } else { 906 ending += pos; 907 e.innerRawSource = data[pos..ending]; 908 pos = ending + closer.length; 909 } 910 } 911 return Ele(0, e, null); 912 } 913 914 bool closed = selfClosed; 915 916 void considerHtmlParagraphHack(Element n) { 917 assert(!strict); 918 if(e.tagName == "p" && e.tagName == n.tagName) { 919 // html lets you write <p> para 1 <p> para 1 920 // but in the dom tree, they should be siblings, not children. 921 paragraphHackfixRequired = true; 922 } 923 } 924 925 //writef("<%s>", tagName); 926 while(!closed) { 927 Ele n; 928 if(strict) 929 n = readElement(); 930 else 931 n = readElement(parentChain ~ tagName); 932 933 if(n.type == 4) return n; // the document is empty 934 935 if(n.type == 3 && n.element !is null) { 936 // special node, append if possible 937 if(e !is null) 938 e.appendChild(n.element); 939 else 940 piecesBeforeRoot ~= n.element; 941 } else if(n.type == 0) { 942 if(!strict) 943 considerHtmlParagraphHack(n.element); 944 e.appendChild(n.element); 945 } else if(n.type == 1) { 946 bool found = false; 947 if(n.payload != tagName) { 948 if(strict) 949 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 950 else { 951 sawImproperNesting = true; 952 // this is so we don't drop several levels of awful markup 953 if(n.element) { 954 if(!strict) 955 considerHtmlParagraphHack(n.element); 956 e.appendChild(n.element); 957 n.element = null; 958 } 959 960 // is the element open somewhere up the chain? 961 foreach(i, parent; parentChain) 962 if(parent == n.payload) { 963 recentAutoClosedTags ~= tagName; 964 // just rotating it so we don't inadvertently break stuff with vile crap 965 if(recentAutoClosedTags.length > 4) 966 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 967 968 n.element = e; 969 return n; 970 } 971 972 // if not, this is a text node; we can't fix it up... 973 974 // If it's already in the tree somewhere, assume it is closed by algorithm 975 // and we shouldn't output it - odds are the user just flipped a couple tags 976 foreach(ele; e.tree) { 977 if(ele.tagName == n.payload) { 978 found = true; 979 break; 980 } 981 } 982 983 foreach(ele; recentAutoClosedTags) { 984 if(ele == n.payload) { 985 found = true; 986 break; 987 } 988 } 989 990 if(!found) // if not found in the tree though, it's probably just text 991 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 992 } 993 } else { 994 if(n.element) { 995 if(!strict) 996 considerHtmlParagraphHack(n.element); 997 e.appendChild(n.element); 998 } 999 } 1000 1001 if(n.payload == tagName) // in strict mode, this is always true 1002 closed = true; 1003 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1004 } 1005 //writef("</%s>\n", tagName); 1006 return Ele(0, e, null); 1007 } 1008 1009 // if a tag was opened but not closed by end of file, we can arrive here 1010 if(!strict && pos >= data.length) 1011 return addTag(false); 1012 //else if(strict) assert(0); // should be caught before 1013 1014 switch(data[pos]) { 1015 default: assert(0); 1016 case '/': // self closing tag 1017 return addTag(true); 1018 case '>': 1019 return addTag(false); 1020 case ' ': 1021 case '\t': 1022 case '\n': 1023 case '\r': 1024 // there might be attributes... 1025 moreAttributes: 1026 eatWhitespace(); 1027 1028 // same deal as above the switch.... 1029 if(!strict && pos >= data.length) 1030 return addTag(false); 1031 1032 if(strict && pos >= data.length) 1033 throw new MarkupException("tag open, didn't find > before end of file"); 1034 1035 switch(data[pos]) { 1036 case '/': // self closing tag 1037 return addTag(true); 1038 case '>': // closed tag; open -- we now read the contents 1039 return addTag(false); 1040 default: // it is an attribute 1041 string attrName = readAttributeName(); 1042 string attrValue = attrName; 1043 1044 bool ateAny = eatWhitespace(); 1045 if(strict && ateAny) 1046 throw new MarkupException("inappropriate whitespace after attribute name"); 1047 1048 if(pos >= data.length) { 1049 if(strict) 1050 assert(0, "this should have thrown in readAttributeName"); 1051 else { 1052 data ~= ">"; 1053 goto blankValue; 1054 } 1055 } 1056 if(data[pos] == '=') { 1057 pos++; 1058 1059 ateAny = eatWhitespace(); 1060 if(strict && ateAny) 1061 throw new MarkupException("inappropriate whitespace after attribute equals"); 1062 1063 attrValue = readAttributeValue(); 1064 1065 eatWhitespace(); 1066 } 1067 1068 blankValue: 1069 1070 if(strict && attrName in attributes) 1071 throw new MarkupException("Repeated attribute: " ~ attrName); 1072 1073 if(attrName.strip().length) 1074 attributes[attrName] = attrValue; 1075 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1076 1077 if(!strict && pos < data.length && data[pos] == '<') { 1078 // this is the broken tag that doesn't have a > at the end 1079 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1080 // let's insert one as a hack 1081 goto case '>'; 1082 } 1083 1084 goto moreAttributes; 1085 } 1086 } 1087 } 1088 1089 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1090 //assert(0); 1091 } 1092 1093 eatWhitespace(); 1094 Ele r; 1095 do { 1096 r = readElement(); // there SHOULD only be one element... 1097 1098 if(r.type == 3 && r.element !is null) 1099 piecesBeforeRoot ~= r.element; 1100 1101 if(r.type == 4) 1102 break; // the document is completely empty... 1103 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1104 1105 root = r.element; 1106 1107 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1108 while(r.type != 4) { 1109 r = readElement(); 1110 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1111 if(r.element !is null) 1112 piecesAfterRoot ~= r.element; 1113 } 1114 } 1115 1116 if(root is null) 1117 { 1118 if(strict) 1119 assert(0, "empty document should be impossible in strict mode"); 1120 else 1121 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1122 } 1123 1124 if(paragraphHackfixRequired) { 1125 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1126 1127 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1128 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1129 1130 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1131 // Kind of inefficient because we can't detect when we recurse back out of a node. 1132 Element[Element] insertLocations; 1133 auto iterator = root.tree; 1134 foreach(ele; iterator) { 1135 if(ele.parentNode is null) 1136 continue; 1137 1138 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1139 auto shouldBePreviousSibling = ele.parentNode; 1140 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1141 if (auto p = holder in insertLocations) { 1142 shouldBePreviousSibling = *p; 1143 assert(shouldBePreviousSibling.parentNode is holder); 1144 } 1145 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1146 insertLocations[holder] = ele; 1147 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1148 } 1149 } 1150 } 1151 } 1152 1153 /* end massive parse function */ 1154 1155 /// Gets the <title> element's innerText, if one exists 1156 @property string title() { 1157 bool doesItMatch(Element e) { 1158 return (e.tagName == "title"); 1159 } 1160 1161 auto e = findFirst(&doesItMatch); 1162 if(e) 1163 return e.innerText(); 1164 return ""; 1165 } 1166 1167 /// Sets the title of the page, creating a <title> element if needed. 1168 @property void title(string t) { 1169 bool doesItMatch(Element e) { 1170 return (e.tagName == "title"); 1171 } 1172 1173 auto e = findFirst(&doesItMatch); 1174 1175 if(!e) { 1176 e = createElement("title"); 1177 auto heads = getElementsByTagName("head"); 1178 if(heads.length) 1179 heads[0].appendChild(e); 1180 } 1181 1182 if(e) 1183 e.innerText = t; 1184 } 1185 1186 // FIXME: would it work to alias root this; ???? might be a good idea 1187 /// These functions all forward to the root element. See the documentation in the Element class. 1188 Element getElementById(string id) { 1189 return root.getElementById(id); 1190 } 1191 1192 /// ditto 1193 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1194 if( is(SomeElementType : Element)) 1195 out(ret) { assert(ret !is null); } 1196 body { 1197 return root.requireElementById!(SomeElementType)(id, file, line); 1198 } 1199 1200 /// ditto 1201 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1202 if( is(SomeElementType : Element)) 1203 out(ret) { assert(ret !is null); } 1204 body { 1205 auto e = cast(SomeElementType) querySelector(selector); 1206 if(e is null) 1207 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1208 return e; 1209 } 1210 1211 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1212 if(is(SomeElementType : Element)) 1213 { 1214 auto e = cast(SomeElementType) querySelector(selector); 1215 return MaybeNullElement!SomeElementType(e); 1216 } 1217 1218 /// ditto 1219 @scriptable 1220 Element querySelector(string selector) { 1221 // see comment below on Document.querySelectorAll 1222 auto s = Selector(selector);//, !loose); 1223 foreach(ref comp; s.components) 1224 if(comp.parts.length && comp.parts[0].separation == 0) 1225 comp.parts[0].separation = -1; 1226 foreach(e; s.getMatchingElementsLazy(this.root)) 1227 return e; 1228 return null; 1229 1230 } 1231 1232 /// ditto 1233 @scriptable 1234 Element[] querySelectorAll(string selector) { 1235 // In standards-compliant code, the document is slightly magical 1236 // in that it is a pseudoelement at top level. It should actually 1237 // match the root as one of its children. 1238 // 1239 // In versions of dom.d before Dec 29 2019, this worked because 1240 // querySelectorAll was willing to return itself. With that bug fix 1241 // (search "arbitrary id asduiwh" in this file for associated unittest) 1242 // this would have failed. Hence adding back the root if it matches the 1243 // selector itself. 1244 // 1245 // I'd love to do this better later. 1246 1247 auto s = Selector(selector);//, !loose); 1248 foreach(ref comp; s.components) 1249 if(comp.parts.length && comp.parts[0].separation == 0) 1250 comp.parts[0].separation = -1; 1251 return s.getMatchingElements(this.root); 1252 } 1253 1254 /// ditto 1255 deprecated("use querySelectorAll instead") 1256 Element[] getElementsBySelector(string selector) { 1257 return root.getElementsBySelector(selector); 1258 } 1259 1260 /// ditto 1261 @scriptable 1262 Element[] getElementsByTagName(string tag) { 1263 return root.getElementsByTagName(tag); 1264 } 1265 1266 /// ditto 1267 @scriptable 1268 Element[] getElementsByClassName(string tag) { 1269 return root.getElementsByClassName(tag); 1270 } 1271 1272 /** FIXME: btw, this could just be a lazy range...... */ 1273 Element getFirstElementByTagName(string tag) { 1274 if(loose) 1275 tag = tag.toLower(); 1276 bool doesItMatch(Element e) { 1277 return e.tagName == tag; 1278 } 1279 return findFirst(&doesItMatch); 1280 } 1281 1282 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1283 Element mainBody() { 1284 return getFirstElementByTagName("body"); 1285 } 1286 1287 /// this uses a weird thing... it's [name=] if no colon and 1288 /// [property=] if colon 1289 string getMeta(string name) { 1290 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1291 auto e = querySelector("head meta["~thing~"="~name~"]"); 1292 if(e is null) 1293 return null; 1294 return e.content; 1295 } 1296 1297 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1298 void setMeta(string name, string value) { 1299 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1300 auto e = querySelector("head meta["~thing~"="~name~"]"); 1301 if(e is null) { 1302 e = requireSelector("head").addChild("meta"); 1303 e.setAttribute(thing, name); 1304 } 1305 1306 e.content = value; 1307 } 1308 1309 ///. 1310 Form[] forms() { 1311 return cast(Form[]) getElementsByTagName("form"); 1312 } 1313 1314 ///. 1315 Form createForm() 1316 out(ret) { 1317 assert(ret !is null); 1318 } 1319 body { 1320 return cast(Form) createElement("form"); 1321 } 1322 1323 ///. 1324 Element createElement(string name) { 1325 if(loose) 1326 name = name.toLower(); 1327 1328 auto e = Element.make(name); 1329 e.parentDocument = this; 1330 1331 return e; 1332 1333 // return new Element(this, name, null, selfClosed); 1334 } 1335 1336 ///. 1337 Element createFragment() { 1338 return new DocumentFragment(this); 1339 } 1340 1341 ///. 1342 Element createTextNode(string content) { 1343 return new TextNode(this, content); 1344 } 1345 1346 1347 ///. 1348 Element findFirst(bool delegate(Element) doesItMatch) { 1349 Element result; 1350 1351 bool goThroughElement(Element e) { 1352 if(doesItMatch(e)) { 1353 result = e; 1354 return true; 1355 } 1356 1357 foreach(child; e.children) { 1358 if(goThroughElement(child)) 1359 return true; 1360 } 1361 1362 return false; 1363 } 1364 1365 goThroughElement(root); 1366 1367 return result; 1368 } 1369 1370 ///. 1371 void clear() { 1372 root = null; 1373 loose = false; 1374 } 1375 1376 ///. 1377 void setProlog(string d) { 1378 _prolog = d; 1379 prologWasSet = true; 1380 } 1381 1382 ///. 1383 private string _prolog = "<!DOCTYPE html>\n"; 1384 private bool prologWasSet = false; // set to true if the user changed it 1385 1386 @property string prolog() const { 1387 // if the user explicitly changed it, do what they want 1388 // or if we didn't keep/find stuff from the document itself, 1389 // we'll use the builtin one as a default. 1390 if(prologWasSet || piecesBeforeRoot.length == 0) 1391 return _prolog; 1392 1393 string p; 1394 foreach(e; piecesBeforeRoot) 1395 p ~= e.toString() ~ "\n"; 1396 return p; 1397 } 1398 1399 ///. 1400 override string toString() const { 1401 return prolog ~ root.toString(); 1402 } 1403 1404 /++ 1405 Writes it out with whitespace for easier eyeball debugging 1406 1407 Do NOT use for anything other than eyeball debugging, 1408 because whitespace may be significant content in XML. 1409 +/ 1410 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1411 string s = prolog; 1412 1413 if(insertComments) s ~= "<!--"; 1414 s ~= "\n"; 1415 if(insertComments) s ~= "-->"; 1416 1417 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1418 foreach(a; piecesAfterRoot) 1419 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1420 return s; 1421 } 1422 1423 ///. 1424 Element root; 1425 1426 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1427 Element[] piecesBeforeRoot; 1428 1429 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1430 Element[] piecesAfterRoot; 1431 1432 ///. 1433 bool loose; 1434 1435 1436 1437 // what follows are for mutation events that you can observe 1438 void delegate(DomMutationEvent)[] eventObservers; 1439 1440 void dispatchMutationEvent(DomMutationEvent e) { 1441 foreach(o; eventObservers) 1442 o(e); 1443 } 1444 } 1445 1446 /// This represents almost everything in the DOM. 1447 /// Group: core_functionality 1448 class Element { 1449 /// Returns a collection of elements by selector. 1450 /// See: [Document.opIndex] 1451 ElementCollection opIndex(string selector) { 1452 auto e = ElementCollection(this); 1453 return e[selector]; 1454 } 1455 1456 /++ 1457 Returns the child node with the particular index. 1458 1459 Be aware that child nodes include text nodes, including 1460 whitespace-only nodes. 1461 +/ 1462 Element opIndex(size_t index) { 1463 if(index >= children.length) 1464 return null; 1465 return this.children[index]; 1466 } 1467 1468 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1469 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1470 if( 1471 is(SomeElementType : Element) 1472 ) 1473 out(ret) { 1474 assert(ret !is null); 1475 } 1476 body { 1477 auto e = cast(SomeElementType) getElementById(id); 1478 if(e is null) 1479 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1480 return e; 1481 } 1482 1483 /// ditto but with selectors instead of ids 1484 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1485 if( 1486 is(SomeElementType : Element) 1487 ) 1488 out(ret) { 1489 assert(ret !is null); 1490 } 1491 body { 1492 auto e = cast(SomeElementType) querySelector(selector); 1493 if(e is null) 1494 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1495 return e; 1496 } 1497 1498 1499 /++ 1500 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1501 +/ 1502 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1503 if(is(SomeElementType : Element)) 1504 { 1505 auto e = cast(SomeElementType) querySelector(selector); 1506 return MaybeNullElement!SomeElementType(e); 1507 } 1508 1509 1510 1511 /// get all the classes on this element 1512 @property string[] classes() { 1513 return split(className, " "); 1514 } 1515 1516 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1517 @scriptable 1518 Element addClass(string c) { 1519 if(hasClass(c)) 1520 return this; // don't add it twice 1521 1522 string cn = getAttribute("class"); 1523 if(cn.length == 0) { 1524 setAttribute("class", c); 1525 return this; 1526 } else { 1527 setAttribute("class", cn ~ " " ~ c); 1528 } 1529 1530 return this; 1531 } 1532 1533 /// Removes a particular class name. 1534 @scriptable 1535 Element removeClass(string c) { 1536 if(!hasClass(c)) 1537 return this; 1538 string n; 1539 foreach(name; classes) { 1540 if(c == name) 1541 continue; // cut it out 1542 if(n.length) 1543 n ~= " "; 1544 n ~= name; 1545 } 1546 1547 className = n.strip(); 1548 1549 return this; 1550 } 1551 1552 /// Returns whether the given class appears in this element. 1553 bool hasClass(string c) { 1554 string cn = className; 1555 1556 auto idx = cn.indexOf(c); 1557 if(idx == -1) 1558 return false; 1559 1560 foreach(cla; cn.split(" ")) 1561 if(cla == c) 1562 return true; 1563 return false; 1564 1565 /* 1566 int rightSide = idx + c.length; 1567 1568 bool checkRight() { 1569 if(rightSide == cn.length) 1570 return true; // it's the only class 1571 else if(iswhite(cn[rightSide])) 1572 return true; 1573 return false; // this is a substring of something else.. 1574 } 1575 1576 if(idx == 0) { 1577 return checkRight(); 1578 } else { 1579 if(!iswhite(cn[idx - 1])) 1580 return false; // substring 1581 return checkRight(); 1582 } 1583 1584 assert(0); 1585 */ 1586 } 1587 1588 1589 /* ******************************* 1590 DOM Mutation 1591 *********************************/ 1592 /// convenience function to quickly add a tag with some text or 1593 /// other relevant info (for example, it's a src for an <img> element 1594 /// instead of inner text) 1595 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1596 in { 1597 assert(tagName !is null); 1598 } 1599 out(e) { 1600 //assert(e.parentNode is this); 1601 //assert(e.parentDocument is this.parentDocument); 1602 } 1603 body { 1604 auto e = Element.make(tagName, childInfo, childInfo2); 1605 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1606 // return the parent. That will break existing code though. 1607 return appendChild(e); 1608 } 1609 1610 /// Another convenience function. Adds a child directly after the current one, returning 1611 /// the new child. 1612 /// 1613 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1614 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1615 in { 1616 assert(tagName !is null); 1617 assert(parentNode !is null); 1618 } 1619 out(e) { 1620 assert(e.parentNode is this.parentNode); 1621 assert(e.parentDocument is this.parentDocument); 1622 } 1623 body { 1624 auto e = Element.make(tagName, childInfo, childInfo2); 1625 return parentNode.insertAfter(this, e); 1626 } 1627 1628 /// 1629 Element addSibling(Element e) { 1630 return parentNode.insertAfter(this, e); 1631 } 1632 1633 /// 1634 Element addChild(Element e) { 1635 return this.appendChild(e); 1636 } 1637 1638 /// Convenience function to append text intermixed with other children. 1639 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1640 /// or div.addChildren("Hello, ", user.name, "!"); 1641 1642 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1643 void addChildren(T...)(T t) { 1644 foreach(item; t) { 1645 static if(is(item : Element)) 1646 appendChild(item); 1647 else static if (is(isSomeString!(item))) 1648 appendText(to!string(item)); 1649 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1650 } 1651 } 1652 1653 ///. 1654 Element addChild(string tagName, Element firstChild, string info2 = null) 1655 in { 1656 assert(firstChild !is null); 1657 } 1658 out(ret) { 1659 assert(ret !is null); 1660 assert(ret.parentNode is this); 1661 assert(firstChild.parentNode is ret); 1662 1663 assert(ret.parentDocument is this.parentDocument); 1664 //assert(firstChild.parentDocument is this.parentDocument); 1665 } 1666 body { 1667 auto e = Element.make(tagName, "", info2); 1668 e.appendChild(firstChild); 1669 this.appendChild(e); 1670 return e; 1671 } 1672 1673 /// 1674 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1675 in { 1676 } 1677 out(ret) { 1678 assert(ret !is null); 1679 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 1680 assert(ret.parentDocument is this.parentDocument); 1681 } 1682 body { 1683 auto e = Element.make(tagName, "", info2); 1684 this.appendChild(e); 1685 e.innerHTML = innerHtml.source; 1686 return e; 1687 } 1688 1689 1690 /// . 1691 void appendChildren(Element[] children) { 1692 foreach(ele; children) 1693 appendChild(ele); 1694 } 1695 1696 ///. 1697 void reparent(Element newParent) 1698 in { 1699 assert(newParent !is null); 1700 assert(parentNode !is null); 1701 } 1702 out { 1703 assert(this.parentNode is newParent); 1704 //assert(isInArray(this, newParent.children)); 1705 } 1706 body { 1707 parentNode.removeChild(this); 1708 newParent.appendChild(this); 1709 } 1710 1711 /** 1712 Strips this tag out of the document, putting its inner html 1713 as children of the parent. 1714 1715 For example, given: `<p>hello <b>there</b></p>`, if you 1716 call `stripOut` on the `b` element, you'll be left with 1717 `<p>hello there<p>`. 1718 1719 The idea here is to make it easy to get rid of garbage 1720 markup you aren't interested in. 1721 */ 1722 void stripOut() 1723 in { 1724 assert(parentNode !is null); 1725 } 1726 out { 1727 assert(parentNode is null); 1728 assert(children.length == 0); 1729 } 1730 body { 1731 foreach(c; children) 1732 c.parentNode = null; // remove the parent 1733 if(children.length) 1734 parentNode.replaceChild(this, this.children); 1735 else 1736 parentNode.removeChild(this); 1737 this.children.length = 0; // we reparented them all above 1738 } 1739 1740 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1741 /// if the element already isn't in a tree, it does nothing. 1742 Element removeFromTree() 1743 in { 1744 1745 } 1746 out(var) { 1747 assert(this.parentNode is null); 1748 assert(var is this); 1749 } 1750 body { 1751 if(this.parentNode is null) 1752 return this; 1753 1754 this.parentNode.removeChild(this); 1755 1756 return this; 1757 } 1758 1759 /++ 1760 Wraps this element inside the given element. 1761 It's like `this.replaceWith(what); what.appendchild(this);` 1762 1763 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1764 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1765 +/ 1766 Element wrapIn(Element what) 1767 in { 1768 assert(what !is null); 1769 } 1770 out(ret) { 1771 assert(this.parentNode is what); 1772 assert(ret is what); 1773 } 1774 body { 1775 this.replaceWith(what); 1776 what.appendChild(this); 1777 1778 return what; 1779 } 1780 1781 /// Replaces this element with something else in the tree. 1782 Element replaceWith(Element e) 1783 in { 1784 assert(this.parentNode !is null); 1785 } 1786 body { 1787 e.removeFromTree(); 1788 this.parentNode.replaceChild(this, e); 1789 return e; 1790 } 1791 1792 /** 1793 Splits the className into an array of each class given 1794 */ 1795 string[] classNames() const { 1796 return className().split(" "); 1797 } 1798 1799 /** 1800 Fetches the first consecutive text nodes concatenated together. 1801 1802 1803 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1804 1805 See_also: [directText], [innerText] 1806 */ 1807 string firstInnerText() const { 1808 string s; 1809 foreach(child; children) { 1810 if(child.nodeType != NodeType.Text) 1811 break; 1812 1813 s ~= child.nodeValue(); 1814 } 1815 return s; 1816 } 1817 1818 1819 /** 1820 Returns the text directly under this element. 1821 1822 1823 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1824 past child tags. So, `<example>some <b>bold</b> text</example>` 1825 will return `some text` because it only gets the text, skipping non-text children. 1826 1827 See_also: [firstInnerText], [innerText] 1828 */ 1829 @property string directText() { 1830 string ret; 1831 foreach(e; children) { 1832 if(e.nodeType == NodeType.Text) 1833 ret ~= e.nodeValue(); 1834 } 1835 1836 return ret; 1837 } 1838 1839 /** 1840 Sets the direct text, without modifying other child nodes. 1841 1842 1843 Unlike [innerText], this does *not* remove existing elements in the element. 1844 1845 It only replaces the first text node it sees. 1846 1847 If there are no text nodes, it calls [appendText]. 1848 1849 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1850 */ 1851 @property void directText(string text) { 1852 foreach(e; children) { 1853 if(e.nodeType == NodeType.Text) { 1854 auto it = cast(TextNode) e; 1855 it.contents = text; 1856 return; 1857 } 1858 } 1859 1860 appendText(text); 1861 } 1862 1863 // do nothing, this is primarily a virtual hook 1864 // for links and forms 1865 void setValue(string field, string value) { } 1866 1867 1868 // this is a thing so i can remove observer support if it gets slow 1869 // I have not implemented all these yet 1870 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1871 if(parentDocument is null) return; 1872 DomMutationEvent me; 1873 me.operation = operation; 1874 me.target = this; 1875 me.relatedString = s1; 1876 me.relatedString2 = s2; 1877 me.related = r; 1878 me.related2 = r2; 1879 parentDocument.dispatchMutationEvent(me); 1880 } 1881 1882 // putting all the members up front 1883 1884 // this ought to be private. don't use it directly. 1885 Element[] children; 1886 1887 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1888 string tagName; 1889 1890 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1891 string[string] attributes; 1892 1893 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1894 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1895 private bool selfClosed; 1896 1897 /// Get the parent Document object that contains this element. 1898 /// It may be null, so remember to check for that. 1899 Document parentDocument; 1900 1901 ///. 1902 inout(Element) parentNode() inout { 1903 auto p = _parentNode; 1904 1905 if(cast(DocumentFragment) p) 1906 return p._parentNode; 1907 1908 return p; 1909 } 1910 1911 //protected 1912 Element parentNode(Element e) { 1913 return _parentNode = e; 1914 } 1915 1916 private Element _parentNode; 1917 1918 // the next few methods are for implementing interactive kind of things 1919 private CssStyle _computedStyle; 1920 1921 // these are here for event handlers. Don't forget that this library never fires events. 1922 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1923 EventHandler[][string] bubblingEventHandlers; 1924 EventHandler[][string] capturingEventHandlers; 1925 EventHandler[string] defaultEventHandlers; 1926 1927 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1928 if(event.length > 2 && event[0..2] == "on") 1929 event = event[2 .. $]; 1930 1931 if(useCapture) 1932 capturingEventHandlers[event] ~= handler; 1933 else 1934 bubblingEventHandlers[event] ~= handler; 1935 } 1936 1937 1938 // and now methods 1939 1940 /// Convenience function to try to do the right thing for HTML. This is the main 1941 /// way I create elements. 1942 static Element make(string tagName, string childInfo = null, string childInfo2 = null) { 1943 bool selfClosed = tagName.isInArray(selfClosedElements); 1944 1945 Element e; 1946 // want to create the right kind of object for the given tag... 1947 switch(tagName) { 1948 case "#text": 1949 e = new TextNode(null, childInfo); 1950 return e; 1951 // break; 1952 case "table": 1953 e = new Table(null); 1954 break; 1955 case "a": 1956 e = new Link(null); 1957 break; 1958 case "form": 1959 e = new Form(null); 1960 break; 1961 case "tr": 1962 e = new TableRow(null); 1963 break; 1964 case "td", "th": 1965 e = new TableCell(null, tagName); 1966 break; 1967 default: 1968 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 1969 } 1970 1971 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 1972 e.tagName = tagName; 1973 e.selfClosed = selfClosed; 1974 1975 if(childInfo !is null) 1976 switch(tagName) { 1977 /* html5 convenience tags */ 1978 case "audio": 1979 if(childInfo.length) 1980 e.addChild("source", childInfo); 1981 if(childInfo2 !is null) 1982 e.appendText(childInfo2); 1983 break; 1984 case "source": 1985 e.src = childInfo; 1986 if(childInfo2 !is null) 1987 e.type = childInfo2; 1988 break; 1989 /* regular html 4 stuff */ 1990 case "img": 1991 e.src = childInfo; 1992 if(childInfo2 !is null) 1993 e.alt = childInfo2; 1994 break; 1995 case "link": 1996 e.href = childInfo; 1997 if(childInfo2 !is null) 1998 e.rel = childInfo2; 1999 break; 2000 case "option": 2001 e.innerText = childInfo; 2002 if(childInfo2 !is null) 2003 e.value = childInfo2; 2004 break; 2005 case "input": 2006 e.type = "hidden"; 2007 e.name = childInfo; 2008 if(childInfo2 !is null) 2009 e.value = childInfo2; 2010 break; 2011 case "button": 2012 e.innerText = childInfo; 2013 if(childInfo2 !is null) 2014 e.type = childInfo2; 2015 break; 2016 case "a": 2017 e.innerText = childInfo; 2018 if(childInfo2 !is null) 2019 e.href = childInfo2; 2020 break; 2021 case "script": 2022 case "style": 2023 e.innerRawSource = childInfo; 2024 break; 2025 case "meta": 2026 e.name = childInfo; 2027 if(childInfo2 !is null) 2028 e.content = childInfo2; 2029 break; 2030 /* generically, assume we were passed text and perhaps class */ 2031 default: 2032 e.innerText = childInfo; 2033 if(childInfo2.length) 2034 e.className = childInfo2; 2035 } 2036 2037 return e; 2038 } 2039 2040 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2041 // FIXME: childInfo2 is ignored when info1 is null 2042 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2043 m.innerHTML = innerHtml.source; 2044 return m; 2045 } 2046 2047 static Element make(string tagName, Element child, string childInfo2 = null) { 2048 auto m = Element.make(tagName, cast(string) null, childInfo2); 2049 m.appendChild(child); 2050 return m; 2051 } 2052 2053 2054 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2055 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2056 parentDocument = _parentDocument; 2057 tagName = _tagName; 2058 if(_attributes !is null) 2059 attributes = _attributes; 2060 selfClosed = _selfClosed; 2061 2062 version(dom_node_indexes) 2063 this.dataset.nodeIndex = to!string(&(this.attributes)); 2064 2065 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2066 } 2067 2068 /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2069 /// Note also that without a parent document, elements are always in strict, case-sensitive mode. 2070 this(string _tagName, string[string] _attributes = null) { 2071 tagName = _tagName; 2072 if(_attributes !is null) 2073 attributes = _attributes; 2074 selfClosed = tagName.isInArray(selfClosedElements); 2075 2076 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2077 //children.length = 8; 2078 //children.length = 0; 2079 2080 version(dom_node_indexes) 2081 this.dataset.nodeIndex = to!string(&(this.attributes)); 2082 } 2083 2084 private this(Document _parentDocument) { 2085 parentDocument = _parentDocument; 2086 2087 version(dom_node_indexes) 2088 this.dataset.nodeIndex = to!string(&(this.attributes)); 2089 } 2090 2091 2092 /* ******************************* 2093 Navigating the DOM 2094 *********************************/ 2095 2096 /// Returns the first child of this element. If it has no children, returns null. 2097 /// Remember, text nodes are children too. 2098 @property Element firstChild() { 2099 return children.length ? children[0] : null; 2100 } 2101 2102 /// 2103 @property Element lastChild() { 2104 return children.length ? children[$ - 1] : null; 2105 } 2106 2107 /// UNTESTED 2108 /// the next element you would encounter if you were reading it in the source 2109 Element nextInSource() { 2110 auto n = firstChild; 2111 if(n is null) 2112 n = nextSibling(); 2113 if(n is null) { 2114 auto p = this.parentNode; 2115 while(p !is null && n is null) { 2116 n = p.nextSibling; 2117 } 2118 } 2119 2120 return n; 2121 } 2122 2123 /// UNTESTED 2124 /// ditto 2125 Element previousInSource() { 2126 auto p = previousSibling; 2127 if(p is null) { 2128 auto par = parentNode; 2129 if(par) 2130 p = par.lastChild; 2131 if(p is null) 2132 p = par; 2133 } 2134 return p; 2135 } 2136 2137 ///. 2138 @property Element previousSibling(string tagName = null) { 2139 if(this.parentNode is null) 2140 return null; 2141 Element ps = null; 2142 foreach(e; this.parentNode.childNodes) { 2143 if(e is this) 2144 break; 2145 if(tagName == "*" && e.nodeType != NodeType.Text) { 2146 ps = e; 2147 break; 2148 } 2149 if(tagName is null || e.tagName == tagName) 2150 ps = e; 2151 } 2152 2153 return ps; 2154 } 2155 2156 ///. 2157 @property Element nextSibling(string tagName = null) { 2158 if(this.parentNode is null) 2159 return null; 2160 Element ns = null; 2161 bool mightBe = false; 2162 foreach(e; this.parentNode.childNodes) { 2163 if(e is this) { 2164 mightBe = true; 2165 continue; 2166 } 2167 if(mightBe) { 2168 if(tagName == "*" && e.nodeType != NodeType.Text) { 2169 ns = e; 2170 break; 2171 } 2172 if(tagName is null || e.tagName == tagName) { 2173 ns = e; 2174 break; 2175 } 2176 } 2177 } 2178 2179 return ns; 2180 } 2181 2182 2183 /// Gets the nearest node, going up the chain, with the given tagName 2184 /// May return null or throw. 2185 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2186 if(tagName is null) { 2187 static if(is(T == Form)) 2188 tagName = "form"; 2189 else static if(is(T == Table)) 2190 tagName = "table"; 2191 else static if(is(T == Link)) 2192 tagName == "a"; 2193 } 2194 2195 auto par = this.parentNode; 2196 while(par !is null) { 2197 if(tagName is null || par.tagName == tagName) 2198 break; 2199 par = par.parentNode; 2200 } 2201 2202 static if(!is(T == Element)) { 2203 auto t = cast(T) par; 2204 if(t is null) 2205 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2206 } else 2207 auto t = par; 2208 2209 return t; 2210 } 2211 2212 ///. 2213 Element getElementById(string id) { 2214 // FIXME: I use this function a lot, and it's kinda slow 2215 // not terribly slow, but not great. 2216 foreach(e; tree) 2217 if(e.id == id) 2218 return e; 2219 return null; 2220 } 2221 2222 /// Note: you can give multiple selectors, separated by commas. 2223 /// It will return the first match it finds. 2224 @scriptable 2225 Element querySelector(string selector) { 2226 // FIXME: inefficient; it gets all results just to discard most of them 2227 auto list = getElementsBySelector(selector); 2228 if(list.length == 0) 2229 return null; 2230 return list[0]; 2231 } 2232 2233 /// a more standards-compliant alias for getElementsBySelector 2234 @scriptable 2235 Element[] querySelectorAll(string selector) { 2236 return getElementsBySelector(selector); 2237 } 2238 2239 /// If the element matches the given selector. Previously known as `matchesSelector`. 2240 @scriptable 2241 bool matches(string selector) { 2242 /+ 2243 bool caseSensitiveTags = true; 2244 if(parentDocument && parentDocument.loose) 2245 caseSensitiveTags = false; 2246 +/ 2247 2248 Selector s = Selector(selector); 2249 return s.matchesElement(this); 2250 } 2251 2252 /// Returns itself or the closest parent that matches the given selector, or null if none found 2253 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2254 @scriptable 2255 Element closest(string selector) { 2256 Element e = this; 2257 while(e !is null) { 2258 if(e.matches(selector)) 2259 return e; 2260 e = e.parentNode; 2261 } 2262 return null; 2263 } 2264 2265 /** 2266 Returns elements that match the given CSS selector 2267 2268 * -- all, default if nothing else is there 2269 2270 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2271 2272 It is all additive 2273 2274 OP 2275 2276 space = descendant 2277 > = direct descendant 2278 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2279 2280 [foo] Foo is present as an attribute 2281 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2282 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2283 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2284 2285 [item$=sdas] ends with 2286 [item^-sdsad] begins with 2287 2288 Quotes are optional here. 2289 2290 Pseudos: 2291 :first-child 2292 :last-child 2293 :link (same as a[href] for our purposes here) 2294 2295 2296 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2297 2298 2299 2300 This ONLY cares about elements. text, etc, are ignored 2301 2302 2303 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2304 */ 2305 Element[] getElementsBySelector(string selector) { 2306 // FIXME: this function could probably use some performance attention 2307 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2308 2309 2310 bool caseSensitiveTags = true; 2311 if(parentDocument && parentDocument.loose) 2312 caseSensitiveTags = false; 2313 2314 Element[] ret; 2315 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2316 ret ~= sel.getElements(this); 2317 return ret; 2318 } 2319 2320 /// . 2321 Element[] getElementsByClassName(string cn) { 2322 // is this correct? 2323 return getElementsBySelector("." ~ cn); 2324 } 2325 2326 ///. 2327 Element[] getElementsByTagName(string tag) { 2328 if(parentDocument && parentDocument.loose) 2329 tag = tag.toLower(); 2330 Element[] ret; 2331 foreach(e; tree) 2332 if(e.tagName == tag) 2333 ret ~= e; 2334 return ret; 2335 } 2336 2337 2338 /* ******************************* 2339 Attributes 2340 *********************************/ 2341 2342 /** 2343 Gets the given attribute value, or null if the 2344 attribute is not set. 2345 2346 Note that the returned string is decoded, so it no longer contains any xml entities. 2347 */ 2348 @scriptable 2349 string getAttribute(string name) const { 2350 if(parentDocument && parentDocument.loose) 2351 name = name.toLower(); 2352 auto e = name in attributes; 2353 if(e) 2354 return *e; 2355 else 2356 return null; 2357 } 2358 2359 /** 2360 Sets an attribute. Returns this for easy chaining 2361 */ 2362 @scriptable 2363 Element setAttribute(string name, string value) { 2364 if(parentDocument && parentDocument.loose) 2365 name = name.toLower(); 2366 2367 // I never use this shit legitimately and neither should you 2368 auto it = name.toLower(); 2369 if(it == "href" || it == "src") { 2370 auto v = value.strip().toLower(); 2371 if(v.startsWith("vbscript:")) 2372 value = value[9..$]; 2373 if(v.startsWith("javascript:")) 2374 value = value[11..$]; 2375 } 2376 2377 attributes[name] = value; 2378 2379 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2380 2381 return this; 2382 } 2383 2384 /** 2385 Returns if the attribute exists. 2386 */ 2387 @scriptable 2388 bool hasAttribute(string name) { 2389 if(parentDocument && parentDocument.loose) 2390 name = name.toLower(); 2391 2392 if(name in attributes) 2393 return true; 2394 else 2395 return false; 2396 } 2397 2398 /** 2399 Removes the given attribute from the element. 2400 */ 2401 @scriptable 2402 Element removeAttribute(string name) 2403 out(ret) { 2404 assert(ret is this); 2405 } 2406 body { 2407 if(parentDocument && parentDocument.loose) 2408 name = name.toLower(); 2409 if(name in attributes) 2410 attributes.remove(name); 2411 2412 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2413 return this; 2414 } 2415 2416 /** 2417 Gets the class attribute's contents. Returns 2418 an empty string if it has no class. 2419 */ 2420 @property string className() const { 2421 auto c = getAttribute("class"); 2422 if(c is null) 2423 return ""; 2424 return c; 2425 } 2426 2427 ///. 2428 @property Element className(string c) { 2429 setAttribute("class", c); 2430 return this; 2431 } 2432 2433 /** 2434 Provides easy access to common HTML attributes, object style. 2435 2436 --- 2437 auto element = Element.make("a"); 2438 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2439 string where = a.href; // same as a.getAttribute("href"); 2440 --- 2441 2442 */ 2443 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2444 if(v !is null) 2445 setAttribute(name, v); 2446 return getAttribute(name); 2447 } 2448 2449 /** 2450 Old access to attributes. Use [attrs] instead. 2451 2452 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2453 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2454 2455 Instead, use element.attrs.attribute, element.attrs["attribute"], 2456 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2457 */ 2458 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2459 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2460 } 2461 2462 /* 2463 // this would be nice for convenience, but it broke the getter above. 2464 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2465 if(boolean) 2466 setAttribute(name, name); 2467 else 2468 removeAttribute(name); 2469 } 2470 */ 2471 2472 /** 2473 Returns the element's children. 2474 */ 2475 @property const(Element[]) childNodes() const { 2476 return children; 2477 } 2478 2479 /// Mutable version of the same 2480 @property Element[] childNodes() { // FIXME: the above should be inout 2481 return children; 2482 } 2483 2484 /++ 2485 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2486 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2487 +/ 2488 @property DataSet dataset() { 2489 return DataSet(this); 2490 } 2491 2492 /++ 2493 Gives dot/opIndex access to attributes 2494 --- 2495 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2496 --- 2497 +/ 2498 @property AttributeSet attrs() { 2499 return AttributeSet(this); 2500 } 2501 2502 /++ 2503 Provides both string and object style (like in Javascript) access to the style attribute. 2504 2505 --- 2506 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2507 --- 2508 +/ 2509 @property ElementStyle style() { 2510 return ElementStyle(this); 2511 } 2512 2513 /++ 2514 This sets the style attribute with a string. 2515 +/ 2516 @property ElementStyle style(string s) { 2517 this.setAttribute("style", s); 2518 return this.style; 2519 } 2520 2521 private void parseAttributes(string[] whichOnes = null) { 2522 /+ 2523 if(whichOnes is null) 2524 whichOnes = attributes.keys; 2525 foreach(attr; whichOnes) { 2526 switch(attr) { 2527 case "id": 2528 2529 break; 2530 case "class": 2531 2532 break; 2533 case "style": 2534 2535 break; 2536 default: 2537 // we don't care about it 2538 } 2539 } 2540 +/ 2541 } 2542 2543 2544 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2545 /// Don't use this. 2546 @property CssStyle computedStyle() { 2547 if(_computedStyle is null) { 2548 auto style = this.getAttribute("style"); 2549 /* we'll treat shitty old html attributes as css here */ 2550 if(this.hasAttribute("width")) 2551 style ~= "; width: " ~ this.attrs.width; 2552 if(this.hasAttribute("height")) 2553 style ~= "; height: " ~ this.attrs.height; 2554 if(this.hasAttribute("bgcolor")) 2555 style ~= "; background-color: " ~ this.attrs.bgcolor; 2556 if(this.tagName == "body" && this.hasAttribute("text")) 2557 style ~= "; color: " ~ this.attrs.text; 2558 if(this.hasAttribute("color")) 2559 style ~= "; color: " ~ this.attrs.color; 2560 /* done */ 2561 2562 2563 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2564 } 2565 return _computedStyle; 2566 } 2567 2568 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2569 version(browser) { 2570 void* expansionHook; ///ditto 2571 int offsetWidth; ///ditto 2572 int offsetHeight; ///ditto 2573 int offsetLeft; ///ditto 2574 int offsetTop; ///ditto 2575 Element offsetParent; ///ditto 2576 bool hasLayout; ///ditto 2577 int zIndex; ///ditto 2578 2579 ///ditto 2580 int absoluteLeft() { 2581 int a = offsetLeft; 2582 auto p = offsetParent; 2583 while(p) { 2584 a += p.offsetLeft; 2585 p = p.offsetParent; 2586 } 2587 2588 return a; 2589 } 2590 2591 ///ditto 2592 int absoluteTop() { 2593 int a = offsetTop; 2594 auto p = offsetParent; 2595 while(p) { 2596 a += p.offsetTop; 2597 p = p.offsetParent; 2598 } 2599 2600 return a; 2601 } 2602 } 2603 2604 // Back to the regular dom functions 2605 2606 public: 2607 2608 2609 /* ******************************* 2610 DOM Mutation 2611 *********************************/ 2612 2613 /// Removes all inner content from the tag; all child text and elements are gone. 2614 void removeAllChildren() 2615 out { 2616 assert(this.children.length == 0); 2617 } 2618 body { 2619 children = null; 2620 } 2621 2622 2623 /++ 2624 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 2625 2626 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 2627 2628 History: 2629 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 2630 +/ 2631 Element appendChild(Element e) 2632 in { 2633 assert(e !is null); 2634 } 2635 out (ret) { 2636 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 2637 assert(e.parentDocument is this.parentDocument); 2638 assert(e is ret); 2639 } 2640 body { 2641 if(e.parentNode !is null) 2642 e.parentNode.removeChild(e); 2643 2644 selfClosed = false; 2645 e.parentNode = this; 2646 e.parentDocument = this.parentDocument; 2647 if(auto frag = cast(DocumentFragment) e) 2648 children ~= frag.children; 2649 else 2650 children ~= e; 2651 2652 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2653 2654 return e; 2655 } 2656 2657 /// Inserts the second element to this node, right before the first param 2658 Element insertBefore(in Element where, Element what) 2659 in { 2660 assert(where !is null); 2661 assert(where.parentNode is this); 2662 assert(what !is null); 2663 assert(what.parentNode is null); 2664 } 2665 out (ret) { 2666 assert(where.parentNode is this); 2667 assert(what.parentNode is this); 2668 2669 assert(what.parentDocument is this.parentDocument); 2670 assert(ret is what); 2671 } 2672 body { 2673 foreach(i, e; children) { 2674 if(e is where) { 2675 if(auto frag = cast(DocumentFragment) what) 2676 children = children[0..i] ~ frag.children ~ children[i..$]; 2677 else 2678 children = children[0..i] ~ what ~ children[i..$]; 2679 what.parentDocument = this.parentDocument; 2680 what.parentNode = this; 2681 return what; 2682 } 2683 } 2684 2685 return what; 2686 2687 assert(0); 2688 } 2689 2690 /++ 2691 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2692 +/ 2693 Element insertAfter(in Element where, Element what) 2694 in { 2695 assert(where !is null); 2696 assert(where.parentNode is this); 2697 assert(what !is null); 2698 assert(what.parentNode is null); 2699 } 2700 out (ret) { 2701 assert(where.parentNode is this); 2702 assert(what.parentNode is this); 2703 assert(what.parentDocument is this.parentDocument); 2704 assert(ret is what); 2705 } 2706 body { 2707 foreach(i, e; children) { 2708 if(e is where) { 2709 if(auto frag = cast(DocumentFragment) what) 2710 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2711 else 2712 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2713 what.parentNode = this; 2714 what.parentDocument = this.parentDocument; 2715 return what; 2716 } 2717 } 2718 2719 return what; 2720 2721 assert(0); 2722 } 2723 2724 /// swaps one child for a new thing. Returns the old child which is now parentless. 2725 Element swapNode(Element child, Element replacement) 2726 in { 2727 assert(child !is null); 2728 assert(replacement !is null); 2729 assert(child.parentNode is this); 2730 } 2731 out(ret) { 2732 assert(ret is child); 2733 assert(ret.parentNode is null); 2734 assert(replacement.parentNode is this); 2735 assert(replacement.parentDocument is this.parentDocument); 2736 } 2737 body { 2738 foreach(ref c; this.children) 2739 if(c is child) { 2740 c.parentNode = null; 2741 c = replacement; 2742 c.parentNode = this; 2743 c.parentDocument = this.parentDocument; 2744 return child; 2745 } 2746 assert(0); 2747 } 2748 2749 2750 /++ 2751 Appends the given to the node. 2752 2753 2754 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2755 yields `<example>text <b>bold</b> hi</example>`. 2756 2757 See_Also: 2758 [firstInnerText], [directText], [innerText], [appendChild] 2759 +/ 2760 @scriptable 2761 Element appendText(string text) { 2762 Element e = new TextNode(parentDocument, text); 2763 appendChild(e); 2764 return this; 2765 } 2766 2767 /++ 2768 Returns child elements which are of a tag type (excludes text, comments, etc.). 2769 2770 2771 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2772 2773 Params: 2774 tagName = filter results to only the child elements with the given tag name. 2775 +/ 2776 @property Element[] childElements(string tagName = null) { 2777 Element[] ret; 2778 foreach(c; children) 2779 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2780 ret ~= c; 2781 return ret; 2782 } 2783 2784 /++ 2785 Appends the given html to the element, returning the elements appended 2786 2787 2788 This is similar to `element.innerHTML += "html string";` in Javascript. 2789 +/ 2790 @scriptable 2791 Element[] appendHtml(string html) { 2792 Document d = new Document("<root>" ~ html ~ "</root>"); 2793 return stealChildren(d.root); 2794 } 2795 2796 2797 ///. 2798 void insertChildAfter(Element child, Element where) 2799 in { 2800 assert(child !is null); 2801 assert(where !is null); 2802 assert(where.parentNode is this); 2803 assert(!selfClosed); 2804 //assert(isInArray(where, children)); 2805 } 2806 out { 2807 assert(child.parentNode is this); 2808 assert(where.parentNode is this); 2809 //assert(isInArray(where, children)); 2810 //assert(isInArray(child, children)); 2811 } 2812 body { 2813 foreach(ref i, c; children) { 2814 if(c is where) { 2815 i++; 2816 if(auto frag = cast(DocumentFragment) child) 2817 children = children[0..i] ~ child.children ~ children[i..$]; 2818 else 2819 children = children[0..i] ~ child ~ children[i..$]; 2820 child.parentNode = this; 2821 child.parentDocument = this.parentDocument; 2822 break; 2823 } 2824 } 2825 } 2826 2827 /++ 2828 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2829 2830 Params: 2831 e = the element whose children you want to steal 2832 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2833 +/ 2834 Element[] stealChildren(Element e, Element position = null) 2835 in { 2836 assert(!selfClosed); 2837 assert(e !is null); 2838 //if(position !is null) 2839 //assert(isInArray(position, children)); 2840 } 2841 out (ret) { 2842 assert(e.children.length == 0); 2843 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2844 version(none) 2845 debug foreach(child; ret) { 2846 assert(child.parentNode is this); 2847 assert(child.parentDocument is this.parentDocument); 2848 } 2849 } 2850 body { 2851 foreach(c; e.children) { 2852 c.parentNode = this; 2853 c.parentDocument = this.parentDocument; 2854 } 2855 if(position is null) 2856 children ~= e.children; 2857 else { 2858 foreach(i, child; children) { 2859 if(child is position) { 2860 children = children[0..i] ~ 2861 e.children ~ 2862 children[i..$]; 2863 break; 2864 } 2865 } 2866 } 2867 2868 auto ret = e.children[]; 2869 e.children.length = 0; 2870 2871 return ret; 2872 } 2873 2874 /// Puts the current element first in our children list. The given element must not have a parent already. 2875 Element prependChild(Element e) 2876 in { 2877 assert(e.parentNode is null); 2878 assert(!selfClosed); 2879 } 2880 out { 2881 assert(e.parentNode is this); 2882 assert(e.parentDocument is this.parentDocument); 2883 assert(children[0] is e); 2884 } 2885 body { 2886 e.parentNode = this; 2887 e.parentDocument = this.parentDocument; 2888 if(auto frag = cast(DocumentFragment) e) 2889 children = e.children ~ children; 2890 else 2891 children = e ~ children; 2892 return e; 2893 } 2894 2895 2896 /** 2897 Returns a string containing all child elements, formatted such that it could be pasted into 2898 an XML file. 2899 */ 2900 @property string innerHTML(Appender!string where = appender!string()) const { 2901 if(children is null) 2902 return ""; 2903 2904 auto start = where.data.length; 2905 2906 foreach(child; children) { 2907 assert(child !is null); 2908 2909 child.writeToAppender(where); 2910 } 2911 2912 return where.data[start .. $]; 2913 } 2914 2915 /** 2916 Takes some html and replaces the element's children with the tree made from the string. 2917 */ 2918 @property Element innerHTML(string html, bool strict = false) { 2919 if(html.length) 2920 selfClosed = false; 2921 2922 if(html.length == 0) { 2923 // I often say innerHTML = ""; as a shortcut to clear it out, 2924 // so let's optimize that slightly. 2925 removeAllChildren(); 2926 return this; 2927 } 2928 2929 auto doc = new Document(); 2930 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 2931 2932 children = doc.root.children; 2933 foreach(c; children) { 2934 c.parentNode = this; 2935 c.parentDocument = this.parentDocument; 2936 } 2937 2938 reparentTreeDocuments(); 2939 2940 doc.root.children = null; 2941 2942 return this; 2943 } 2944 2945 /// ditto 2946 @property Element innerHTML(Html html) { 2947 return this.innerHTML = html.source; 2948 } 2949 2950 private void reparentTreeDocuments() { 2951 foreach(c; this.tree) 2952 c.parentDocument = this.parentDocument; 2953 } 2954 2955 /** 2956 Replaces this node with the given html string, which is parsed 2957 2958 Note: this invalidates the this reference, since it is removed 2959 from the tree. 2960 2961 Returns the new children that replace this. 2962 */ 2963 @property Element[] outerHTML(string html) { 2964 auto doc = new Document(); 2965 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 2966 2967 children = doc.root.children; 2968 foreach(c; children) { 2969 c.parentNode = this; 2970 c.parentDocument = this.parentDocument; 2971 } 2972 2973 2974 reparentTreeDocuments(); 2975 2976 2977 stripOut(); 2978 2979 return doc.root.children; 2980 } 2981 2982 /++ 2983 Returns all the html for this element, including the tag itself. 2984 2985 This is equivalent to calling toString(). 2986 +/ 2987 @property string outerHTML() { 2988 return this.toString(); 2989 } 2990 2991 /// This sets the inner content of the element *without* trying to parse it. 2992 /// You can inject any code in there; this serves as an escape hatch from the dom. 2993 /// 2994 /// The only times you might actually need it are for < style > and < script > tags in html. 2995 /// Other than that, innerHTML and/or innerText should do the job. 2996 @property void innerRawSource(string rawSource) { 2997 children.length = 0; 2998 auto rs = new RawSource(parentDocument, rawSource); 2999 rs.parentNode = this; 3000 3001 children ~= rs; 3002 } 3003 3004 ///. 3005 Element replaceChild(Element find, Element replace) 3006 in { 3007 assert(find !is null); 3008 assert(replace !is null); 3009 assert(replace.parentNode is null); 3010 } 3011 out(ret) { 3012 assert(ret is replace); 3013 assert(replace.parentNode is this); 3014 assert(replace.parentDocument is this.parentDocument); 3015 assert(find.parentNode is null); 3016 } 3017 body { 3018 // FIXME 3019 //if(auto frag = cast(DocumentFragment) replace) 3020 //return this.replaceChild(frag, replace.children); 3021 for(int i = 0; i < children.length; i++) { 3022 if(children[i] is find) { 3023 replace.parentNode = this; 3024 children[i].parentNode = null; 3025 children[i] = replace; 3026 replace.parentDocument = this.parentDocument; 3027 return replace; 3028 } 3029 } 3030 3031 throw new Exception("no such child"); 3032 } 3033 3034 /** 3035 Replaces the given element with a whole group. 3036 */ 3037 void replaceChild(Element find, Element[] replace) 3038 in { 3039 assert(find !is null); 3040 assert(replace !is null); 3041 assert(find.parentNode is this); 3042 debug foreach(r; replace) 3043 assert(r.parentNode is null); 3044 } 3045 out { 3046 assert(find.parentNode is null); 3047 assert(children.length >= replace.length); 3048 debug foreach(child; children) 3049 assert(child !is find); 3050 debug foreach(r; replace) 3051 assert(r.parentNode is this); 3052 } 3053 body { 3054 if(replace.length == 0) { 3055 removeChild(find); 3056 return; 3057 } 3058 assert(replace.length); 3059 for(int i = 0; i < children.length; i++) { 3060 if(children[i] is find) { 3061 children[i].parentNode = null; // this element should now be dead 3062 children[i] = replace[0]; 3063 foreach(e; replace) { 3064 e.parentNode = this; 3065 e.parentDocument = this.parentDocument; 3066 } 3067 3068 children = .insertAfter(children, i, replace[1..$]); 3069 3070 return; 3071 } 3072 } 3073 3074 throw new Exception("no such child"); 3075 } 3076 3077 3078 /** 3079 Removes the given child from this list. 3080 3081 Returns the removed element. 3082 */ 3083 Element removeChild(Element c) 3084 in { 3085 assert(c !is null); 3086 assert(c.parentNode is this); 3087 } 3088 out { 3089 debug foreach(child; children) 3090 assert(child !is c); 3091 assert(c.parentNode is null); 3092 } 3093 body { 3094 foreach(i, e; children) { 3095 if(e is c) { 3096 children = children[0..i] ~ children [i+1..$]; 3097 c.parentNode = null; 3098 return c; 3099 } 3100 } 3101 3102 throw new Exception("no such child"); 3103 } 3104 3105 /// This removes all the children from this element, returning the old list. 3106 Element[] removeChildren() 3107 out (ret) { 3108 assert(children.length == 0); 3109 debug foreach(r; ret) 3110 assert(r.parentNode is null); 3111 } 3112 body { 3113 Element[] oldChildren = children.dup; 3114 foreach(c; oldChildren) 3115 c.parentNode = null; 3116 3117 children.length = 0; 3118 3119 return oldChildren; 3120 } 3121 3122 /** 3123 Fetch the inside text, with all tags stripped out. 3124 3125 <p>cool <b>api</b> & code dude<p> 3126 innerText of that is "cool api & code dude". 3127 3128 This does not match what real innerText does! 3129 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3130 3131 It is more like textContent. 3132 */ 3133 @scriptable 3134 @property string innerText() const { 3135 string s; 3136 foreach(child; children) { 3137 if(child.nodeType != NodeType.Text) 3138 s ~= child.innerText; 3139 else 3140 s ~= child.nodeValue(); 3141 } 3142 return s; 3143 } 3144 3145 /// 3146 alias textContent = innerText; 3147 3148 /** 3149 Sets the inside text, replacing all children. You don't 3150 have to worry about entity encoding. 3151 */ 3152 @scriptable 3153 @property void innerText(string text) { 3154 selfClosed = false; 3155 Element e = new TextNode(parentDocument, text); 3156 e.parentNode = this; 3157 children = [e]; 3158 } 3159 3160 /** 3161 Strips this node out of the document, replacing it with the given text 3162 */ 3163 @property void outerText(string text) { 3164 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3165 } 3166 3167 /** 3168 Same result as innerText; the tag with all inner tags stripped out 3169 */ 3170 @property string outerText() const { 3171 return innerText; 3172 } 3173 3174 3175 /* ******************************* 3176 Miscellaneous 3177 *********************************/ 3178 3179 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3180 @property Element cloned() 3181 /+ 3182 out(ret) { 3183 // FIXME: not sure why these fail... 3184 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3185 assert(ret.tagName == this.tagName); 3186 } 3187 body { 3188 +/ 3189 { 3190 return this.cloneNode(true); 3191 } 3192 3193 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3194 Element cloneNode(bool deepClone) { 3195 auto e = Element.make(this.tagName); 3196 e.parentDocument = this.parentDocument; 3197 e.attributes = this.attributes.aadup; 3198 e.selfClosed = this.selfClosed; 3199 3200 if(deepClone) { 3201 foreach(child; children) { 3202 e.appendChild(child.cloneNode(true)); 3203 } 3204 } 3205 3206 3207 return e; 3208 } 3209 3210 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3211 string nodeValue() const { 3212 return ""; 3213 } 3214 3215 // should return int 3216 ///. 3217 @property int nodeType() const { 3218 return 1; 3219 } 3220 3221 3222 invariant () { 3223 assert(tagName.indexOf(" ") == -1); 3224 3225 if(children !is null) 3226 debug foreach(child; children) { 3227 // assert(parentNode !is null); 3228 assert(child !is null); 3229 // assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 3230 assert(child !is this); 3231 //assert(child !is parentNode); 3232 } 3233 3234 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3235 if(parentNode !is null) { 3236 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3237 auto lol = cast(TextNode) this; 3238 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3239 } 3240 +/ 3241 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3242 // reason is so you can create these without needing a reference to the document 3243 } 3244 3245 /** 3246 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3247 an XML file. 3248 */ 3249 override string toString() const { 3250 return writeToAppender(); 3251 } 3252 3253 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3254 if(indentWith is null) 3255 return null; 3256 string s; 3257 3258 if(insertComments) s ~= "<!--"; 3259 s ~= "\n"; 3260 foreach(indent; 0 .. indentationLevel) 3261 s ~= indentWith; 3262 if(insertComments) s ~= "-->"; 3263 3264 return s; 3265 } 3266 3267 /++ 3268 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3269 for eyeball debugging. 3270 +/ 3271 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3272 3273 // first step is to concatenate any consecutive text nodes to simplify 3274 // the white space analysis. this changes the tree! but i'm allowed since 3275 // the comment always says it changes the comments 3276 // 3277 // actually i'm not allowed cuz it is const so i will cheat and lie 3278 /+ 3279 TextNode lastTextChild = null; 3280 for(int a = 0; a < this.children.length; a++) { 3281 auto child = this.children[a]; 3282 if(auto tn = cast(TextNode) child) { 3283 if(lastTextChild) { 3284 lastTextChild.contents ~= tn.contents; 3285 for(int b = a; b < this.children.length - 1; b++) 3286 this.children[b] = this.children[b + 1]; 3287 this.children = this.children[0 .. $-1]; 3288 } else { 3289 lastTextChild = tn; 3290 } 3291 } else { 3292 lastTextChild = null; 3293 } 3294 } 3295 +/ 3296 3297 const(Element)[] children; 3298 3299 TextNode lastTextChild = null; 3300 for(int a = 0; a < this.children.length; a++) { 3301 auto child = this.children[a]; 3302 if(auto tn = cast(const(TextNode)) child) { 3303 if(lastTextChild !is null) { 3304 lastTextChild.contents ~= tn.contents; 3305 } else { 3306 lastTextChild = new TextNode(""); 3307 lastTextChild.parentNode = cast(Element) this; 3308 lastTextChild.contents ~= tn.contents; 3309 children ~= lastTextChild; 3310 } 3311 } else { 3312 lastTextChild = null; 3313 children ~= child; 3314 } 3315 } 3316 3317 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3318 3319 s ~= "<"; 3320 s ~= tagName; 3321 3322 // i sort these for consistent output. might be more legible 3323 // but especially it keeps it the same for diff purposes. 3324 import std.algorithm : sort; 3325 auto keys = sort(attributes.keys); 3326 foreach(n; keys) { 3327 auto v = attributes[n]; 3328 s ~= " "; 3329 s ~= n; 3330 s ~= "=\""; 3331 s ~= htmlEntitiesEncode(v); 3332 s ~= "\""; 3333 } 3334 3335 if(selfClosed){ 3336 s ~= " />"; 3337 return s; 3338 } 3339 3340 s ~= ">"; 3341 3342 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3343 // just keep them on the same line 3344 if(tagName.isInArray(inlineElements) || allAreInlineHtml(children)) { 3345 foreach(child; children) { 3346 s ~= child.toString();//toPrettyString(false, 0, null); 3347 } 3348 } else { 3349 foreach(child; children) { 3350 assert(child !is null); 3351 3352 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3353 } 3354 3355 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3356 } 3357 3358 s ~= "</"; 3359 s ~= tagName; 3360 s ~= ">"; 3361 3362 return s; 3363 } 3364 3365 /+ 3366 /// Writes out the opening tag only, if applicable. 3367 string writeTagOnly(Appender!string where = appender!string()) const { 3368 +/ 3369 3370 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3371 /// Note: the ordering of attributes in the string is undefined. 3372 /// Returns the string it creates. 3373 string writeToAppender(Appender!string where = appender!string()) const { 3374 assert(tagName !is null); 3375 3376 where.reserve((this.children.length + 1) * 512); 3377 3378 auto start = where.data.length; 3379 3380 where.put("<"); 3381 where.put(tagName); 3382 3383 import std.algorithm : sort; 3384 auto keys = sort(attributes.keys); 3385 foreach(n; keys) { 3386 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3387 //assert(v !is null); 3388 where.put(" "); 3389 where.put(n); 3390 where.put("=\""); 3391 htmlEntitiesEncode(v, where); 3392 where.put("\""); 3393 } 3394 3395 if(selfClosed){ 3396 where.put(" />"); 3397 return where.data[start .. $]; 3398 } 3399 3400 where.put('>'); 3401 3402 innerHTML(where); 3403 3404 where.put("</"); 3405 where.put(tagName); 3406 where.put('>'); 3407 3408 return where.data[start .. $]; 3409 } 3410 3411 /** 3412 Returns a lazy range of all its children, recursively. 3413 */ 3414 @property ElementStream tree() { 3415 return new ElementStream(this); 3416 } 3417 3418 // I moved these from Form because they are generally useful. 3419 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3420 /// Tags: HTML, HTML5 3421 // FIXME: add overloads for other label types... 3422 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3423 auto fs = this; 3424 auto i = fs.addChild("label"); 3425 3426 if(!(type == "checkbox" || type == "radio")) 3427 i.addChild("span", label); 3428 3429 Element input; 3430 if(type == "textarea") 3431 input = i.addChild("textarea"). 3432 setAttribute("name", name). 3433 setAttribute("rows", "6"); 3434 else 3435 input = i.addChild("input"). 3436 setAttribute("name", name). 3437 setAttribute("type", type); 3438 3439 if(type == "checkbox" || type == "radio") 3440 i.addChild("span", label); 3441 3442 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3443 fieldOptions.applyToElement(input); 3444 return i; 3445 } 3446 3447 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3448 auto fs = this; 3449 auto i = fs.addChild("label"); 3450 i.addChild(label); 3451 Element input; 3452 if(type == "textarea") 3453 input = i.addChild("textarea"). 3454 setAttribute("name", name). 3455 setAttribute("rows", "6"); 3456 else 3457 input = i.addChild("input"). 3458 setAttribute("name", name). 3459 setAttribute("type", type); 3460 3461 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3462 fieldOptions.applyToElement(input); 3463 return i; 3464 } 3465 3466 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3467 return addField(label, name, "text", fieldOptions); 3468 } 3469 3470 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 3471 auto fs = this; 3472 auto i = fs.addChild("label"); 3473 i.addChild("span", label); 3474 auto sel = i.addChild("select").setAttribute("name", name); 3475 3476 foreach(k, opt; options) 3477 sel.addChild("option", opt, k); 3478 3479 // FIXME: implement requirements somehow 3480 3481 return i; 3482 } 3483 3484 Element addSubmitButton(string label = null) { 3485 auto t = this; 3486 auto holder = t.addChild("div"); 3487 holder.addClass("submit-holder"); 3488 auto i = holder.addChild("input"); 3489 i.type = "submit"; 3490 if(label.length) 3491 i.value = label; 3492 return holder; 3493 } 3494 3495 } 3496 3497 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3498 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3499 /// Group: core_functionality 3500 class XmlDocument : Document { 3501 this(string data) { 3502 contentType = "text/xml; charset=utf-8"; 3503 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3504 3505 parseStrict(data); 3506 } 3507 } 3508 3509 3510 3511 3512 import std..string; 3513 3514 /* domconvenience follows { */ 3515 3516 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3517 /// Group: core_functionality 3518 Element[] findComments(Document document, string txt) { 3519 return findComments(document.root, txt); 3520 } 3521 3522 /// ditto 3523 Element[] findComments(Element element, string txt) { 3524 txt = txt.strip().toLower(); 3525 Element[] ret; 3526 3527 foreach(comment; element.getElementsByTagName("#comment")) { 3528 string t = comment.nodeValue().strip().toLower(); 3529 if(t == txt) 3530 ret ~= comment; 3531 } 3532 3533 return ret; 3534 } 3535 3536 /// An option type that propagates null. See: [Element.optionSelector] 3537 /// Group: implementations 3538 struct MaybeNullElement(SomeElementType) { 3539 this(SomeElementType ele) { 3540 this.element = ele; 3541 } 3542 SomeElementType element; 3543 3544 /// Forwards to the element, wit a null check inserted that propagates null. 3545 auto opDispatch(string method, T...)(T args) { 3546 alias type = typeof(__traits(getMember, element, method)(args)); 3547 static if(is(type : Element)) { 3548 if(element is null) 3549 return MaybeNullElement!type(null); 3550 return __traits(getMember, element, method)(args); 3551 } else static if(is(type == string)) { 3552 if(element is null) 3553 return cast(string) null; 3554 return __traits(getMember, element, method)(args); 3555 } else static if(is(type == void)) { 3556 if(element is null) 3557 return; 3558 __traits(getMember, element, method)(args); 3559 } else { 3560 static assert(0); 3561 } 3562 } 3563 3564 /// Allows implicit casting to the wrapped element. 3565 alias element this; 3566 } 3567 3568 /++ 3569 A collection of elements which forwards methods to the children. 3570 +/ 3571 /// Group: implementations 3572 struct ElementCollection { 3573 /// 3574 this(Element e) { 3575 elements = [e]; 3576 } 3577 3578 /// 3579 this(Element e, string selector) { 3580 elements = e.querySelectorAll(selector); 3581 } 3582 3583 /// 3584 this(Element[] e) { 3585 elements = e; 3586 } 3587 3588 Element[] elements; 3589 //alias elements this; // let it implicitly convert to the underlying array 3590 3591 /// 3592 ElementCollection opIndex(string selector) { 3593 ElementCollection ec; 3594 foreach(e; elements) 3595 ec.elements ~= e.getElementsBySelector(selector); 3596 return ec; 3597 } 3598 3599 /// 3600 Element opIndex(int i) { 3601 return elements[i]; 3602 } 3603 3604 /// if you slice it, give the underlying array for easy forwarding of the 3605 /// collection to range expecting algorithms or looping over. 3606 Element[] opSlice() { 3607 return elements; 3608 } 3609 3610 /// And input range primitives so we can foreach over this 3611 void popFront() { 3612 elements = elements[1..$]; 3613 } 3614 3615 /// ditto 3616 Element front() { 3617 return elements[0]; 3618 } 3619 3620 /// ditto 3621 bool empty() { 3622 return !elements.length; 3623 } 3624 3625 /++ 3626 Collects strings from the collection, concatenating them together 3627 Kinda like running reduce and ~= on it. 3628 3629 --- 3630 document["p"].collect!"innerText"; 3631 --- 3632 +/ 3633 string collect(string method)(string separator = "") { 3634 string text; 3635 foreach(e; elements) { 3636 text ~= mixin("e." ~ method); 3637 text ~= separator; 3638 } 3639 return text; 3640 } 3641 3642 /// Forward method calls to each individual [Element|element] of the collection 3643 /// returns this so it can be chained. 3644 ElementCollection opDispatch(string name, T...)(T t) { 3645 foreach(e; elements) { 3646 mixin("e." ~ name)(t); 3647 } 3648 return this; 3649 } 3650 3651 /++ 3652 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3653 +/ 3654 ElementCollection wrapIn(Element what) { 3655 foreach(e; elements) { 3656 e.wrapIn(what.cloneNode(false)); 3657 } 3658 3659 return this; 3660 } 3661 3662 /// Concatenates two ElementCollection together. 3663 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3664 return ElementCollection(this.elements ~ rhs.elements); 3665 } 3666 } 3667 3668 3669 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3670 /// Group: implementations 3671 mixin template JavascriptStyleDispatch() { 3672 /// 3673 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 3674 if(v !is null) 3675 return set(name, v); 3676 return get(name); 3677 } 3678 3679 /// 3680 string opIndex(string key) const { 3681 return get(key); 3682 } 3683 3684 /// 3685 string opIndexAssign(string value, string field) { 3686 return set(field, value); 3687 } 3688 3689 // FIXME: doesn't seem to work 3690 string* opBinary(string op)(string key) if(op == "in") { 3691 return key in fields; 3692 } 3693 } 3694 3695 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3696 /// 3697 /// Do not create this object directly. 3698 /// Group: implementations 3699 struct DataSet { 3700 /// 3701 this(Element e) { 3702 this._element = e; 3703 } 3704 3705 private Element _element; 3706 /// 3707 string set(string name, string value) { 3708 _element.setAttribute("data-" ~ unCamelCase(name), value); 3709 return value; 3710 } 3711 3712 /// 3713 string get(string name) const { 3714 return _element.getAttribute("data-" ~ unCamelCase(name)); 3715 } 3716 3717 /// 3718 mixin JavascriptStyleDispatch!(); 3719 } 3720 3721 /// Proxy object for attributes which will replace the main opDispatch eventually 3722 /// Group: implementations 3723 struct AttributeSet { 3724 /// 3725 this(Element e) { 3726 this._element = e; 3727 } 3728 3729 private Element _element; 3730 /// 3731 string set(string name, string value) { 3732 _element.setAttribute(name, value); 3733 return value; 3734 } 3735 3736 /// 3737 string get(string name) const { 3738 return _element.getAttribute(name); 3739 } 3740 3741 /// 3742 mixin JavascriptStyleDispatch!(); 3743 } 3744 3745 3746 3747 /// for style, i want to be able to set it with a string like a plain attribute, 3748 /// but also be able to do properties Javascript style. 3749 3750 /// Group: implementations 3751 struct ElementStyle { 3752 this(Element parent) { 3753 _element = parent; 3754 } 3755 3756 Element _element; 3757 3758 @property ref inout(string) _attribute() inout { 3759 auto s = "style" in _element.attributes; 3760 if(s is null) { 3761 auto e = cast() _element; // const_cast 3762 e.attributes["style"] = ""; // we need something to reference 3763 s = cast(inout) ("style" in e.attributes); 3764 } 3765 3766 assert(s !is null); 3767 return *s; 3768 } 3769 3770 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3771 3772 string set(string name, string value) { 3773 if(name.length == 0) 3774 return value; 3775 if(name == "cssFloat") 3776 name = "float"; 3777 else 3778 name = unCamelCase(name); 3779 auto r = rules(); 3780 r[name] = value; 3781 3782 _attribute = ""; 3783 foreach(k, v; r) { 3784 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3785 continue; 3786 if(_attribute.length) 3787 _attribute ~= " "; 3788 _attribute ~= k ~ ": " ~ v ~ ";"; 3789 } 3790 3791 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3792 3793 return value; 3794 } 3795 string get(string name) const { 3796 if(name == "cssFloat") 3797 name = "float"; 3798 else 3799 name = unCamelCase(name); 3800 auto r = rules(); 3801 if(name in r) 3802 return r[name]; 3803 return null; 3804 } 3805 3806 string[string] rules() const { 3807 string[string] ret; 3808 foreach(rule; _attribute.split(";")) { 3809 rule = rule.strip(); 3810 if(rule.length == 0) 3811 continue; 3812 auto idx = rule.indexOf(":"); 3813 if(idx == -1) 3814 ret[rule] = ""; 3815 else { 3816 auto name = rule[0 .. idx].strip(); 3817 auto value = rule[idx + 1 .. $].strip(); 3818 3819 ret[name] = value; 3820 } 3821 } 3822 3823 return ret; 3824 } 3825 3826 mixin JavascriptStyleDispatch!(); 3827 } 3828 3829 /// Converts a camel cased propertyName to a css style dashed property-name 3830 string unCamelCase(string a) { 3831 string ret; 3832 foreach(c; a) 3833 if((c >= 'A' && c <= 'Z')) 3834 ret ~= "-" ~ toLower("" ~ c)[0]; 3835 else 3836 ret ~= c; 3837 return ret; 3838 } 3839 3840 /// Translates a css style property-name to a camel cased propertyName 3841 string camelCase(string a) { 3842 string ret; 3843 bool justSawDash = false; 3844 foreach(c; a) 3845 if(c == '-') { 3846 justSawDash = true; 3847 } else { 3848 if(justSawDash) { 3849 justSawDash = false; 3850 ret ~= toUpper("" ~ c); 3851 } else 3852 ret ~= c; 3853 } 3854 return ret; 3855 } 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 // domconvenience ends } 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 // @safe: 3878 3879 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 3880 // Instead, override writeToAppender(); 3881 3882 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 3883 3884 // Stripping them is useful for reading php as html.... but adding them 3885 // is good for building php. 3886 3887 // I need to maintain compatibility with the way it is now too. 3888 3889 import std..string; 3890 import std.exception; 3891 import std.uri; 3892 import std.array; 3893 import std.range; 3894 3895 //import std.stdio; 3896 3897 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 3898 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 3899 // most likely a typo so I say kill kill kill. 3900 3901 3902 /++ 3903 This might belong in another module, but it represents a file with a mime type and some data. 3904 Document implements this interface with type = text/html (see Document.contentType for more info) 3905 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 3906 +/ 3907 /// Group: bonus_functionality 3908 interface FileResource { 3909 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 3910 @property string contentType() const; 3911 /// the data 3912 immutable(ubyte)[] getData() const; 3913 } 3914 3915 3916 3917 3918 ///. 3919 /// Group: bonus_functionality 3920 enum NodeType { Text = 3 } 3921 3922 3923 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 3924 /// Group: core_functionality 3925 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 3926 in {} 3927 out(ret) { assert(ret !is null); } 3928 body { 3929 auto ret = cast(T) e; 3930 if(ret is null) 3931 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 3932 return ret; 3933 } 3934 3935 3936 ///. 3937 /// Group: core_functionality 3938 class DocumentFragment : Element { 3939 ///. 3940 this(Document _parentDocument) { 3941 tagName = "#fragment"; 3942 super(_parentDocument); 3943 } 3944 3945 /++ 3946 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 3947 3948 Since: March 29, 2018 (or git tagged v2.1.0) 3949 +/ 3950 this(Html html) { 3951 this(null); 3952 3953 this.innerHTML = html.source; 3954 } 3955 3956 ///. 3957 override string writeToAppender(Appender!string where = appender!string()) const { 3958 return this.innerHTML(where); 3959 } 3960 3961 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 3962 string s; 3963 foreach(child; children) 3964 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 3965 return s; 3966 } 3967 3968 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 3969 /* 3970 override inout(Element) parentNode() inout { 3971 return children.length ? children[0].parentNode : null; 3972 } 3973 */ 3974 override Element parentNode(Element p) { 3975 this._parentNode = p; 3976 foreach(child; children) 3977 child.parentNode = p; 3978 return p; 3979 } 3980 } 3981 3982 /// Given text, encode all html entities on it - &, <, >, and ". This function also 3983 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 3984 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 3985 /// 3986 /// The output parameter can be given to append to an existing buffer. You don't have to 3987 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 3988 /// Group: core_functionality 3989 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 3990 // if there's no entities, we can save a lot of time by not bothering with the 3991 // decoding loop. This check cuts the net toString time by better than half in my test. 3992 // let me know if it made your tests worse though, since if you use an entity in just about 3993 // every location, the check will add time... but I suspect the average experience is like mine 3994 // since the check gives up as soon as it can anyway. 3995 3996 bool shortcut = true; 3997 foreach(char c; data) { 3998 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 3999 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4000 shortcut = false; // there's actual work to be done 4001 break; 4002 } 4003 } 4004 4005 if(shortcut) { 4006 output.put(data); 4007 return data; 4008 } 4009 4010 auto start = output.data.length; 4011 4012 output.reserve(data.length + 64); // grab some extra space for the encoded entities 4013 4014 foreach(dchar d; data) { 4015 if(d == '&') 4016 output.put("&"); 4017 else if (d == '<') 4018 output.put("<"); 4019 else if (d == '>') 4020 output.put(">"); 4021 else if (d == '\"') 4022 output.put("""); 4023 // else if (d == '\'') 4024 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 4025 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 4026 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 4027 // idk about apostrophes though. Might be worth it, might not. 4028 else if (!encodeNonAscii || (d < 128 && d > 0)) 4029 output.put(d); 4030 else 4031 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 4032 } 4033 4034 //assert(output !is null); // this fails on empty attributes..... 4035 return output.data[start .. $]; 4036 4037 // data = data.replace("\u00a0", " "); 4038 } 4039 4040 /// An alias for htmlEntitiesEncode; it works for xml too 4041 /// Group: core_functionality 4042 string xmlEntitiesEncode(string data) { 4043 return htmlEntitiesEncode(data); 4044 } 4045 4046 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 4047 /// Group: core_functionality 4048 dchar parseEntity(in dchar[] entity) { 4049 switch(entity[1..$-1]) { 4050 case "quot": 4051 return '"'; 4052 case "apos": 4053 return '\''; 4054 case "lt": 4055 return '<'; 4056 case "gt": 4057 return '>'; 4058 case "amp": 4059 return '&'; 4060 // the next are html rather than xml 4061 4062 // Retrieved from https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references 4063 // Only entities that resolve to U+0009 ~ U+1D56B are stated. 4064 case "Tab": return '\u0009'; 4065 case "NewLine": return '\u000A'; 4066 case "excl": return '\u0021'; 4067 case "QUOT": return '\u0022'; 4068 case "num": return '\u0023'; 4069 case "dollar": return '\u0024'; 4070 case "percnt": return '\u0025'; 4071 case "AMP": return '\u0026'; 4072 case "lpar": return '\u0028'; 4073 case "rpar": return '\u0029'; 4074 case "ast": case "midast": return '\u002A'; 4075 case "plus": return '\u002B'; 4076 case "comma": return '\u002C'; 4077 case "period": return '\u002E'; 4078 case "sol": return '\u002F'; 4079 case "colon": return '\u003A'; 4080 case "semi": return '\u003B'; 4081 case "LT": return '\u003C'; 4082 case "equals": return '\u003D'; 4083 case "GT": return '\u003E'; 4084 case "quest": return '\u003F'; 4085 case "commat": return '\u0040'; 4086 case "lsqb": case "lbrack": return '\u005B'; 4087 case "bsol": return '\u005C'; 4088 case "rsqb": case "rbrack": return '\u005D'; 4089 case "Hat": return '\u005E'; 4090 case "lowbar": case "UnderBar": return '\u005F'; 4091 case "grave": case "DiacriticalGrave": return '\u0060'; 4092 case "lcub": case "lbrace": return '\u007B'; 4093 case "verbar": case "vert": case "VerticalLine": return '\u007C'; 4094 case "rcub": case "rbrace": return '\u007D'; 4095 case "nbsp": case "NonBreakingSpace": return '\u00A0'; 4096 case "iexcl": return '\u00A1'; 4097 case "cent": return '\u00A2'; 4098 case "pound": return '\u00A3'; 4099 case "curren": return '\u00A4'; 4100 case "yen": return '\u00A5'; 4101 case "brvbar": return '\u00A6'; 4102 case "sect": return '\u00A7'; 4103 case "Dot": case "die": case "DoubleDot": case "uml": return '\u00A8'; 4104 case "copy": case "COPY": return '\u00A9'; 4105 case "ordf": return '\u00AA'; 4106 case "laquo": return '\u00AB'; 4107 case "not": return '\u00AC'; 4108 case "shy": return '\u00AD'; 4109 case "reg": case "circledR": case "REG": return '\u00AE'; 4110 case "macr": case "strns": return '\u00AF'; 4111 case "deg": return '\u00B0'; 4112 case "plusmn": case "pm": case "PlusMinus": return '\u00B1'; 4113 case "sup2": return '\u00B2'; 4114 case "sup3": return '\u00B3'; 4115 case "acute": case "DiacriticalAcute": return '\u00B4'; 4116 case "micro": return '\u00B5'; 4117 case "para": return '\u00B6'; 4118 case "middot": case "centerdot": case "CenterDot": return '\u00B7'; 4119 case "cedil": case "Cedilla": return '\u00B8'; 4120 case "sup1": return '\u00B9'; 4121 case "ordm": return '\u00BA'; 4122 case "raquo": return '\u00BB'; 4123 case "frac14": return '\u00BC'; 4124 case "frac12": case "half": return '\u00BD'; 4125 case "frac34": return '\u00BE'; 4126 case "iquest": return '\u00BF'; 4127 case "Agrave": return '\u00C0'; 4128 case "Aacute": return '\u00C1'; 4129 case "Acirc": return '\u00C2'; 4130 case "Atilde": return '\u00C3'; 4131 case "Auml": return '\u00C4'; 4132 case "Aring": case "angst": return '\u00C5'; 4133 case "AElig": return '\u00C6'; 4134 case "Ccedil": return '\u00C7'; 4135 case "Egrave": return '\u00C8'; 4136 case "Eacute": return '\u00C9'; 4137 case "Ecirc": return '\u00CA'; 4138 case "Euml": return '\u00CB'; 4139 case "Igrave": return '\u00CC'; 4140 case "Iacute": return '\u00CD'; 4141 case "Icirc": return '\u00CE'; 4142 case "Iuml": return '\u00CF'; 4143 case "ETH": return '\u00D0'; 4144 case "Ntilde": return '\u00D1'; 4145 case "Ograve": return '\u00D2'; 4146 case "Oacute": return '\u00D3'; 4147 case "Ocirc": return '\u00D4'; 4148 case "Otilde": return '\u00D5'; 4149 case "Ouml": return '\u00D6'; 4150 case "times": return '\u00D7'; 4151 case "Oslash": return '\u00D8'; 4152 case "Ugrave": return '\u00D9'; 4153 case "Uacute": return '\u00DA'; 4154 case "Ucirc": return '\u00DB'; 4155 case "Uuml": return '\u00DC'; 4156 case "Yacute": return '\u00DD'; 4157 case "THORN": return '\u00DE'; 4158 case "szlig": return '\u00DF'; 4159 case "agrave": return '\u00E0'; 4160 case "aacute": return '\u00E1'; 4161 case "acirc": return '\u00E2'; 4162 case "atilde": return '\u00E3'; 4163 case "auml": return '\u00E4'; 4164 case "aring": return '\u00E5'; 4165 case "aelig": return '\u00E6'; 4166 case "ccedil": return '\u00E7'; 4167 case "egrave": return '\u00E8'; 4168 case "eacute": return '\u00E9'; 4169 case "ecirc": return '\u00EA'; 4170 case "euml": return '\u00EB'; 4171 case "igrave": return '\u00EC'; 4172 case "iacute": return '\u00ED'; 4173 case "icirc": return '\u00EE'; 4174 case "iuml": return '\u00EF'; 4175 case "eth": return '\u00F0'; 4176 case "ntilde": return '\u00F1'; 4177 case "ograve": return '\u00F2'; 4178 case "oacute": return '\u00F3'; 4179 case "ocirc": return '\u00F4'; 4180 case "otilde": return '\u00F5'; 4181 case "ouml": return '\u00F6'; 4182 case "divide": case "div": return '\u00F7'; 4183 case "oslash": return '\u00F8'; 4184 case "ugrave": return '\u00F9'; 4185 case "uacute": return '\u00FA'; 4186 case "ucirc": return '\u00FB'; 4187 case "uuml": return '\u00FC'; 4188 case "yacute": return '\u00FD'; 4189 case "thorn": return '\u00FE'; 4190 case "yuml": return '\u00FF'; 4191 case "Amacr": return '\u0100'; 4192 case "amacr": return '\u0101'; 4193 case "Abreve": return '\u0102'; 4194 case "abreve": return '\u0103'; 4195 case "Aogon": return '\u0104'; 4196 case "aogon": return '\u0105'; 4197 case "Cacute": return '\u0106'; 4198 case "cacute": return '\u0107'; 4199 case "Ccirc": return '\u0108'; 4200 case "ccirc": return '\u0109'; 4201 case "Cdot": return '\u010A'; 4202 case "cdot": return '\u010B'; 4203 case "Ccaron": return '\u010C'; 4204 case "ccaron": return '\u010D'; 4205 case "Dcaron": return '\u010E'; 4206 case "dcaron": return '\u010F'; 4207 case "Dstrok": return '\u0110'; 4208 case "dstrok": return '\u0111'; 4209 case "Emacr": return '\u0112'; 4210 case "emacr": return '\u0113'; 4211 case "Edot": return '\u0116'; 4212 case "edot": return '\u0117'; 4213 case "Eogon": return '\u0118'; 4214 case "eogon": return '\u0119'; 4215 case "Ecaron": return '\u011A'; 4216 case "ecaron": return '\u011B'; 4217 case "Gcirc": return '\u011C'; 4218 case "gcirc": return '\u011D'; 4219 case "Gbreve": return '\u011E'; 4220 case "gbreve": return '\u011F'; 4221 case "Gdot": return '\u0120'; 4222 case "gdot": return '\u0121'; 4223 case "Gcedil": return '\u0122'; 4224 case "Hcirc": return '\u0124'; 4225 case "hcirc": return '\u0125'; 4226 case "Hstrok": return '\u0126'; 4227 case "hstrok": return '\u0127'; 4228 case "Itilde": return '\u0128'; 4229 case "itilde": return '\u0129'; 4230 case "Imacr": return '\u012A'; 4231 case "imacr": return '\u012B'; 4232 case "Iogon": return '\u012E'; 4233 case "iogon": return '\u012F'; 4234 case "Idot": return '\u0130'; 4235 case "imath": case "inodot": return '\u0131'; 4236 case "IJlig": return '\u0132'; 4237 case "ijlig": return '\u0133'; 4238 case "Jcirc": return '\u0134'; 4239 case "jcirc": return '\u0135'; 4240 case "Kcedil": return '\u0136'; 4241 case "kcedil": return '\u0137'; 4242 case "kgreen": return '\u0138'; 4243 case "Lacute": return '\u0139'; 4244 case "lacute": return '\u013A'; 4245 case "Lcedil": return '\u013B'; 4246 case "lcedil": return '\u013C'; 4247 case "Lcaron": return '\u013D'; 4248 case "lcaron": return '\u013E'; 4249 case "Lmidot": return '\u013F'; 4250 case "lmidot": return '\u0140'; 4251 case "Lstrok": return '\u0141'; 4252 case "lstrok": return '\u0142'; 4253 case "Nacute": return '\u0143'; 4254 case "nacute": return '\u0144'; 4255 case "Ncedil": return '\u0145'; 4256 case "ncedil": return '\u0146'; 4257 case "Ncaron": return '\u0147'; 4258 case "ncaron": return '\u0148'; 4259 case "napos": return '\u0149'; 4260 case "ENG": return '\u014A'; 4261 case "eng": return '\u014B'; 4262 case "Omacr": return '\u014C'; 4263 case "omacr": return '\u014D'; 4264 case "Odblac": return '\u0150'; 4265 case "odblac": return '\u0151'; 4266 case "OElig": return '\u0152'; 4267 case "oelig": return '\u0153'; 4268 case "Racute": return '\u0154'; 4269 case "racute": return '\u0155'; 4270 case "Rcedil": return '\u0156'; 4271 case "rcedil": return '\u0157'; 4272 case "Rcaron": return '\u0158'; 4273 case "rcaron": return '\u0159'; 4274 case "Sacute": return '\u015A'; 4275 case "sacute": return '\u015B'; 4276 case "Scirc": return '\u015C'; 4277 case "scirc": return '\u015D'; 4278 case "Scedil": return '\u015E'; 4279 case "scedil": return '\u015F'; 4280 case "Scaron": return '\u0160'; 4281 case "scaron": return '\u0161'; 4282 case "Tcedil": return '\u0162'; 4283 case "tcedil": return '\u0163'; 4284 case "Tcaron": return '\u0164'; 4285 case "tcaron": return '\u0165'; 4286 case "Tstrok": return '\u0166'; 4287 case "tstrok": return '\u0167'; 4288 case "Utilde": return '\u0168'; 4289 case "utilde": return '\u0169'; 4290 case "Umacr": return '\u016A'; 4291 case "umacr": return '\u016B'; 4292 case "Ubreve": return '\u016C'; 4293 case "ubreve": return '\u016D'; 4294 case "Uring": return '\u016E'; 4295 case "uring": return '\u016F'; 4296 case "Udblac": return '\u0170'; 4297 case "udblac": return '\u0171'; 4298 case "Uogon": return '\u0172'; 4299 case "uogon": return '\u0173'; 4300 case "Wcirc": return '\u0174'; 4301 case "wcirc": return '\u0175'; 4302 case "Ycirc": return '\u0176'; 4303 case "ycirc": return '\u0177'; 4304 case "Yuml": return '\u0178'; 4305 case "Zacute": return '\u0179'; 4306 case "zacute": return '\u017A'; 4307 case "Zdot": return '\u017B'; 4308 case "zdot": return '\u017C'; 4309 case "Zcaron": return '\u017D'; 4310 case "zcaron": return '\u017E'; 4311 case "fnof": return '\u0192'; 4312 case "imped": return '\u01B5'; 4313 case "gacute": return '\u01F5'; 4314 case "jmath": return '\u0237'; 4315 case "circ": return '\u02C6'; 4316 case "caron": case "Hacek": return '\u02C7'; 4317 case "breve": case "Breve": return '\u02D8'; 4318 case "dot": case "DiacriticalDot": return '\u02D9'; 4319 case "ring": return '\u02DA'; 4320 case "ogon": return '\u02DB'; 4321 case "tilde": case "DiacriticalTilde": return '\u02DC'; 4322 case "dblac": case "DiacriticalDoubleAcute": return '\u02DD'; 4323 case "DownBreve": return '\u0311'; 4324 case "Alpha": return '\u0391'; 4325 case "Beta": return '\u0392'; 4326 case "Gamma": return '\u0393'; 4327 case "Delta": return '\u0394'; 4328 case "Epsilon": return '\u0395'; 4329 case "Zeta": return '\u0396'; 4330 case "Eta": return '\u0397'; 4331 case "Theta": return '\u0398'; 4332 case "Iota": return '\u0399'; 4333 case "Kappa": return '\u039A'; 4334 case "Lambda": return '\u039B'; 4335 case "Mu": return '\u039C'; 4336 case "Nu": return '\u039D'; 4337 case "Xi": return '\u039E'; 4338 case "Omicron": return '\u039F'; 4339 case "Pi": return '\u03A0'; 4340 case "Rho": return '\u03A1'; 4341 case "Sigma": return '\u03A3'; 4342 case "Tau": return '\u03A4'; 4343 case "Upsilon": return '\u03A5'; 4344 case "Phi": return '\u03A6'; 4345 case "Chi": return '\u03A7'; 4346 case "Psi": return '\u03A8'; 4347 case "Omega": case "ohm": return '\u03A9'; 4348 case "alpha": return '\u03B1'; 4349 case "beta": return '\u03B2'; 4350 case "gamma": return '\u03B3'; 4351 case "delta": return '\u03B4'; 4352 case "epsi": case "epsilon": return '\u03B5'; 4353 case "zeta": return '\u03B6'; 4354 case "eta": return '\u03B7'; 4355 case "theta": return '\u03B8'; 4356 case "iota": return '\u03B9'; 4357 case "kappa": return '\u03BA'; 4358 case "lambda": return '\u03BB'; 4359 case "mu": return '\u03BC'; 4360 case "nu": return '\u03BD'; 4361 case "xi": return '\u03BE'; 4362 case "omicron": return '\u03BF'; 4363 case "pi": return '\u03C0'; 4364 case "rho": return '\u03C1'; 4365 case "sigmav": case "varsigma": case "sigmaf": return '\u03C2'; 4366 case "sigma": return '\u03C3'; 4367 case "tau": return '\u03C4'; 4368 case "upsi": case "upsilon": return '\u03C5'; 4369 case "phi": return '\u03C6'; 4370 case "chi": return '\u03C7'; 4371 case "psi": return '\u03C8'; 4372 case "omega": return '\u03C9'; 4373 case "thetav": case "vartheta": case "thetasym": return '\u03D1'; 4374 case "Upsi": case "upsih": return '\u03D2'; 4375 case "straightphi": case "phiv": case "varphi": return '\u03D5'; 4376 case "piv": case "varpi": return '\u03D6'; 4377 case "Gammad": return '\u03DC'; 4378 case "gammad": case "digamma": return '\u03DD'; 4379 case "kappav": case "varkappa": return '\u03F0'; 4380 case "rhov": case "varrho": return '\u03F1'; 4381 case "epsiv": case "varepsilon": case "straightepsilon": return '\u03F5'; 4382 case "bepsi": case "backepsilon": return '\u03F6'; 4383 case "IOcy": return '\u0401'; 4384 case "DJcy": return '\u0402'; 4385 case "GJcy": return '\u0403'; 4386 case "Jukcy": return '\u0404'; 4387 case "DScy": return '\u0405'; 4388 case "Iukcy": return '\u0406'; 4389 case "YIcy": return '\u0407'; 4390 case "Jsercy": return '\u0408'; 4391 case "LJcy": return '\u0409'; 4392 case "NJcy": return '\u040A'; 4393 case "TSHcy": return '\u040B'; 4394 case "KJcy": return '\u040C'; 4395 case "Ubrcy": return '\u040E'; 4396 case "DZcy": return '\u040F'; 4397 case "Acy": return '\u0410'; 4398 case "Bcy": return '\u0411'; 4399 case "Vcy": return '\u0412'; 4400 case "Gcy": return '\u0413'; 4401 case "Dcy": return '\u0414'; 4402 case "IEcy": return '\u0415'; 4403 case "ZHcy": return '\u0416'; 4404 case "Zcy": return '\u0417'; 4405 case "Icy": return '\u0418'; 4406 case "Jcy": return '\u0419'; 4407 case "Kcy": return '\u041A'; 4408 case "Lcy": return '\u041B'; 4409 case "Mcy": return '\u041C'; 4410 case "Ncy": return '\u041D'; 4411 case "Ocy": return '\u041E'; 4412 case "Pcy": return '\u041F'; 4413 case "Rcy": return '\u0420'; 4414 case "Scy": return '\u0421'; 4415 case "Tcy": return '\u0422'; 4416 case "Ucy": return '\u0423'; 4417 case "Fcy": return '\u0424'; 4418 case "KHcy": return '\u0425'; 4419 case "TScy": return '\u0426'; 4420 case "CHcy": return '\u0427'; 4421 case "SHcy": return '\u0428'; 4422 case "SHCHcy": return '\u0429'; 4423 case "HARDcy": return '\u042A'; 4424 case "Ycy": return '\u042B'; 4425 case "SOFTcy": return '\u042C'; 4426 case "Ecy": return '\u042D'; 4427 case "YUcy": return '\u042E'; 4428 case "YAcy": return '\u042F'; 4429 case "acy": return '\u0430'; 4430 case "bcy": return '\u0431'; 4431 case "vcy": return '\u0432'; 4432 case "gcy": return '\u0433'; 4433 case "dcy": return '\u0434'; 4434 case "iecy": return '\u0435'; 4435 case "zhcy": return '\u0436'; 4436 case "zcy": return '\u0437'; 4437 case "icy": return '\u0438'; 4438 case "jcy": return '\u0439'; 4439 case "kcy": return '\u043A'; 4440 case "lcy": return '\u043B'; 4441 case "mcy": return '\u043C'; 4442 case "ncy": return '\u043D'; 4443 case "ocy": return '\u043E'; 4444 case "pcy": return '\u043F'; 4445 case "rcy": return '\u0440'; 4446 case "scy": return '\u0441'; 4447 case "tcy": return '\u0442'; 4448 case "ucy": return '\u0443'; 4449 case "fcy": return '\u0444'; 4450 case "khcy": return '\u0445'; 4451 case "tscy": return '\u0446'; 4452 case "chcy": return '\u0447'; 4453 case "shcy": return '\u0448'; 4454 case "shchcy": return '\u0449'; 4455 case "hardcy": return '\u044A'; 4456 case "ycy": return '\u044B'; 4457 case "softcy": return '\u044C'; 4458 case "ecy": return '\u044D'; 4459 case "yucy": return '\u044E'; 4460 case "yacy": return '\u044F'; 4461 case "iocy": return '\u0451'; 4462 case "djcy": return '\u0452'; 4463 case "gjcy": return '\u0453'; 4464 case "jukcy": return '\u0454'; 4465 case "dscy": return '\u0455'; 4466 case "iukcy": return '\u0456'; 4467 case "yicy": return '\u0457'; 4468 case "jsercy": return '\u0458'; 4469 case "ljcy": return '\u0459'; 4470 case "njcy": return '\u045A'; 4471 case "tshcy": return '\u045B'; 4472 case "kjcy": return '\u045C'; 4473 case "ubrcy": return '\u045E'; 4474 case "dzcy": return '\u045F'; 4475 case "ensp": return '\u2002'; 4476 case "emsp": return '\u2003'; 4477 case "emsp13": return '\u2004'; 4478 case "emsp14": return '\u2005'; 4479 case "numsp": return '\u2007'; 4480 case "puncsp": return '\u2008'; 4481 case "thinsp": case "ThinSpace": return '\u2009'; 4482 case "hairsp": case "VeryThinSpace": return '\u200A'; 4483 case "ZeroWidthSpace": case "NegativeVeryThinSpace": case "NegativeThinSpace": case "NegativeMediumSpace": case "NegativeThickSpace": return '\u200B'; 4484 case "zwnj": return '\u200C'; 4485 case "zwj": return '\u200D'; 4486 case "lrm": return '\u200E'; 4487 case "rlm": return '\u200F'; 4488 case "hyphen": case "dash": return '\u2010'; 4489 case "ndash": return '\u2013'; 4490 case "mdash": return '\u2014'; 4491 case "horbar": return '\u2015'; 4492 case "Verbar": case "Vert": return '\u2016'; 4493 case "lsquo": case "OpenCurlyQuote": return '\u2018'; 4494 case "rsquo": case "rsquor": case "CloseCurlyQuote": return '\u2019'; 4495 case "lsquor": case "sbquo": return '\u201A'; 4496 case "ldquo": case "OpenCurlyDoubleQuote": return '\u201C'; 4497 case "rdquo": case "rdquor": case "CloseCurlyDoubleQuote": return '\u201D'; 4498 case "ldquor": case "bdquo": return '\u201E'; 4499 case "dagger": return '\u2020'; 4500 case "Dagger": case "ddagger": return '\u2021'; 4501 case "bull": case "bullet": return '\u2022'; 4502 case "nldr": return '\u2025'; 4503 case "hellip": case "mldr": return '\u2026'; 4504 case "permil": return '\u2030'; 4505 case "pertenk": return '\u2031'; 4506 case "prime": return '\u2032'; 4507 case "Prime": return '\u2033'; 4508 case "tprime": return '\u2034'; 4509 case "bprime": case "backprime": return '\u2035'; 4510 case "lsaquo": return '\u2039'; 4511 case "rsaquo": return '\u203A'; 4512 case "oline": case "OverBar": return '\u203E'; 4513 case "caret": return '\u2041'; 4514 case "hybull": return '\u2043'; 4515 case "frasl": return '\u2044'; 4516 case "bsemi": return '\u204F'; 4517 case "qprime": return '\u2057'; 4518 case "MediumSpace": return '\u205F'; 4519 case "NoBreak": return '\u2060'; 4520 case "ApplyFunction": case "af": return '\u2061'; 4521 case "InvisibleTimes": case "it": return '\u2062'; 4522 case "InvisibleComma": case "ic": return '\u2063'; 4523 case "euro": return '\u20AC'; 4524 case "tdot": case "TripleDot": return '\u20DB'; 4525 case "DotDot": return '\u20DC'; 4526 case "Copf": case "complexes": return '\u2102'; 4527 case "incare": return '\u2105'; 4528 case "gscr": return '\u210A'; 4529 case "hamilt": case "HilbertSpace": case "Hscr": return '\u210B'; 4530 case "Hfr": case "Poincareplane": return '\u210C'; 4531 case "quaternions": case "Hopf": return '\u210D'; 4532 case "planckh": return '\u210E'; 4533 case "planck": case "hbar": case "plankv": case "hslash": return '\u210F'; 4534 case "Iscr": case "imagline": return '\u2110'; 4535 case "image": case "Im": case "imagpart": case "Ifr": return '\u2111'; 4536 case "Lscr": case "lagran": case "Laplacetrf": return '\u2112'; 4537 case "ell": return '\u2113'; 4538 case "Nopf": case "naturals": return '\u2115'; 4539 case "numero": return '\u2116'; 4540 case "copysr": return '\u2117'; 4541 case "weierp": case "wp": return '\u2118'; 4542 case "Popf": case "primes": return '\u2119'; 4543 case "rationals": case "Qopf": return '\u211A'; 4544 case "Rscr": case "realine": return '\u211B'; 4545 case "real": case "Re": case "realpart": case "Rfr": return '\u211C'; 4546 case "reals": case "Ropf": return '\u211D'; 4547 case "rx": return '\u211E'; 4548 case "trade": case "TRADE": return '\u2122'; 4549 case "integers": case "Zopf": return '\u2124'; 4550 case "mho": return '\u2127'; 4551 case "Zfr": case "zeetrf": return '\u2128'; 4552 case "iiota": return '\u2129'; 4553 case "bernou": case "Bernoullis": case "Bscr": return '\u212C'; 4554 case "Cfr": case "Cayleys": return '\u212D'; 4555 case "escr": return '\u212F'; 4556 case "Escr": case "expectation": return '\u2130'; 4557 case "Fscr": case "Fouriertrf": return '\u2131'; 4558 case "phmmat": case "Mellintrf": case "Mscr": return '\u2133'; 4559 case "order": case "orderof": case "oscr": return '\u2134'; 4560 case "alefsym": case "aleph": return '\u2135'; 4561 case "beth": return '\u2136'; 4562 case "gimel": return '\u2137'; 4563 case "daleth": return '\u2138'; 4564 case "CapitalDifferentialD": case "DD": return '\u2145'; 4565 case "DifferentialD": case "dd": return '\u2146'; 4566 case "ExponentialE": case "exponentiale": case "ee": return '\u2147'; 4567 case "ImaginaryI": case "ii": return '\u2148'; 4568 case "frac13": return '\u2153'; 4569 case "frac23": return '\u2154'; 4570 case "frac15": return '\u2155'; 4571 case "frac25": return '\u2156'; 4572 case "frac35": return '\u2157'; 4573 case "frac45": return '\u2158'; 4574 case "frac16": return '\u2159'; 4575 case "frac56": return '\u215A'; 4576 case "frac18": return '\u215B'; 4577 case "frac38": return '\u215C'; 4578 case "frac58": return '\u215D'; 4579 case "frac78": return '\u215E'; 4580 case "larr": case "leftarrow": case "LeftArrow": case "slarr": case "ShortLeftArrow": return '\u2190'; 4581 case "uarr": case "uparrow": case "UpArrow": case "ShortUpArrow": return '\u2191'; 4582 case "rarr": case "rightarrow": case "RightArrow": case "srarr": case "ShortRightArrow": return '\u2192'; 4583 case "darr": case "downarrow": case "DownArrow": case "ShortDownArrow": return '\u2193'; 4584 case "harr": case "leftrightarrow": case "LeftRightArrow": return '\u2194'; 4585 case "varr": case "updownarrow": case "UpDownArrow": return '\u2195'; 4586 case "nwarr": case "UpperLeftArrow": case "nwarrow": return '\u2196'; 4587 case "nearr": case "UpperRightArrow": case "nearrow": return '\u2197'; 4588 case "searr": case "searrow": case "LowerRightArrow": return '\u2198'; 4589 case "swarr": case "swarrow": case "LowerLeftArrow": return '\u2199'; 4590 case "nlarr": case "nleftarrow": return '\u219A'; 4591 case "nrarr": case "nrightarrow": return '\u219B'; 4592 case "rarrw": case "rightsquigarrow": return '\u219D'; 4593 case "Larr": case "twoheadleftarrow": return '\u219E'; 4594 case "Uarr": return '\u219F'; 4595 case "Rarr": case "twoheadrightarrow": return '\u21A0'; 4596 case "Darr": return '\u21A1'; 4597 case "larrtl": case "leftarrowtail": return '\u21A2'; 4598 case "rarrtl": case "rightarrowtail": return '\u21A3'; 4599 case "LeftTeeArrow": case "mapstoleft": return '\u21A4'; 4600 case "UpTeeArrow": case "mapstoup": return '\u21A5'; 4601 case "map": case "RightTeeArrow": case "mapsto": return '\u21A6'; 4602 case "DownTeeArrow": case "mapstodown": return '\u21A7'; 4603 case "larrhk": case "hookleftarrow": return '\u21A9'; 4604 case "rarrhk": case "hookrightarrow": return '\u21AA'; 4605 case "larrlp": case "looparrowleft": return '\u21AB'; 4606 case "rarrlp": case "looparrowright": return '\u21AC'; 4607 case "harrw": case "leftrightsquigarrow": return '\u21AD'; 4608 case "nharr": case "nleftrightarrow": return '\u21AE'; 4609 case "lsh": case "Lsh": return '\u21B0'; 4610 case "rsh": case "Rsh": return '\u21B1'; 4611 case "ldsh": return '\u21B2'; 4612 case "rdsh": return '\u21B3'; 4613 case "crarr": return '\u21B5'; 4614 case "cularr": case "curvearrowleft": return '\u21B6'; 4615 case "curarr": case "curvearrowright": return '\u21B7'; 4616 case "olarr": case "circlearrowleft": return '\u21BA'; 4617 case "orarr": case "circlearrowright": return '\u21BB'; 4618 case "lharu": case "LeftVector": case "leftharpoonup": return '\u21BC'; 4619 case "lhard": case "leftharpoondown": case "DownLeftVector": return '\u21BD'; 4620 case "uharr": case "upharpoonright": case "RightUpVector": return '\u21BE'; 4621 case "uharl": case "upharpoonleft": case "LeftUpVector": return '\u21BF'; 4622 case "rharu": case "RightVector": case "rightharpoonup": return '\u21C0'; 4623 case "rhard": case "rightharpoondown": case "DownRightVector": return '\u21C1'; 4624 case "dharr": case "RightDownVector": case "downharpoonright": return '\u21C2'; 4625 case "dharl": case "LeftDownVector": case "downharpoonleft": return '\u21C3'; 4626 case "rlarr": case "rightleftarrows": case "RightArrowLeftArrow": return '\u21C4'; 4627 case "udarr": case "UpArrowDownArrow": return '\u21C5'; 4628 case "lrarr": case "leftrightarrows": case "LeftArrowRightArrow": return '\u21C6'; 4629 case "llarr": case "leftleftarrows": return '\u21C7'; 4630 case "uuarr": case "upuparrows": return '\u21C8'; 4631 case "rrarr": case "rightrightarrows": return '\u21C9'; 4632 case "ddarr": case "downdownarrows": return '\u21CA'; 4633 case "lrhar": case "ReverseEquilibrium": case "leftrightharpoons": return '\u21CB'; 4634 case "rlhar": case "rightleftharpoons": case "Equilibrium": return '\u21CC'; 4635 case "nlArr": case "nLeftarrow": return '\u21CD'; 4636 case "nhArr": case "nLeftrightarrow": return '\u21CE'; 4637 case "nrArr": case "nRightarrow": return '\u21CF'; 4638 case "lArr": case "Leftarrow": case "DoubleLeftArrow": return '\u21D0'; 4639 case "uArr": case "Uparrow": case "DoubleUpArrow": return '\u21D1'; 4640 case "rArr": case "Rightarrow": case "Implies": case "DoubleRightArrow": return '\u21D2'; 4641 case "dArr": case "Downarrow": case "DoubleDownArrow": return '\u21D3'; 4642 case "hArr": case "Leftrightarrow": case "DoubleLeftRightArrow": case "iff": return '\u21D4'; 4643 case "vArr": case "Updownarrow": case "DoubleUpDownArrow": return '\u21D5'; 4644 case "nwArr": return '\u21D6'; 4645 case "neArr": return '\u21D7'; 4646 case "seArr": return '\u21D8'; 4647 case "swArr": return '\u21D9'; 4648 case "lAarr": case "Lleftarrow": return '\u21DA'; 4649 case "rAarr": case "Rrightarrow": return '\u21DB'; 4650 case "zigrarr": return '\u21DD'; 4651 case "larrb": case "LeftArrowBar": return '\u21E4'; 4652 case "rarrb": case "RightArrowBar": return '\u21E5'; 4653 case "duarr": case "DownArrowUpArrow": return '\u21F5'; 4654 case "loarr": return '\u21FD'; 4655 case "roarr": return '\u21FE'; 4656 case "hoarr": return '\u21FF'; 4657 case "forall": case "ForAll": return '\u2200'; 4658 case "comp": case "complement": return '\u2201'; 4659 case "part": case "PartialD": return '\u2202'; 4660 case "exist": case "Exists": return '\u2203'; 4661 case "nexist": case "NotExists": case "nexists": return '\u2204'; 4662 case "empty": case "emptyset": case "emptyv": case "varnothing": return '\u2205'; 4663 case "nabla": case "Del": return '\u2207'; 4664 case "isin": case "isinv": case "Element": case "in": return '\u2208'; 4665 case "notin": case "NotElement": case "notinva": return '\u2209'; 4666 case "niv": case "ReverseElement": case "ni": case "SuchThat": return '\u220B'; 4667 case "notni": case "notniva": case "NotReverseElement": return '\u220C'; 4668 case "prod": case "Product": return '\u220F'; 4669 case "coprod": case "Coproduct": return '\u2210'; 4670 case "sum": case "Sum": return '\u2211'; 4671 case "minus": return '\u2212'; 4672 case "mnplus": case "mp": case "MinusPlus": return '\u2213'; 4673 case "plusdo": case "dotplus": return '\u2214'; 4674 case "setmn": case "setminus": case "Backslash": case "ssetmn": case "smallsetminus": return '\u2216'; 4675 case "lowast": return '\u2217'; 4676 case "compfn": case "SmallCircle": return '\u2218'; 4677 case "radic": case "Sqrt": return '\u221A'; 4678 case "prop": case "propto": case "Proportional": case "vprop": case "varpropto": return '\u221D'; 4679 case "infin": return '\u221E'; 4680 case "angrt": return '\u221F'; 4681 case "ang": case "angle": return '\u2220'; 4682 case "angmsd": case "measuredangle": return '\u2221'; 4683 case "angsph": return '\u2222'; 4684 case "mid": case "VerticalBar": case "smid": case "shortmid": return '\u2223'; 4685 case "nmid": case "NotVerticalBar": case "nsmid": case "nshortmid": return '\u2224'; 4686 case "par": case "parallel": case "DoubleVerticalBar": case "spar": case "shortparallel": return '\u2225'; 4687 case "npar": case "nparallel": case "NotDoubleVerticalBar": case "nspar": case "nshortparallel": return '\u2226'; 4688 case "and": case "wedge": return '\u2227'; 4689 case "or": case "vee": return '\u2228'; 4690 case "cap": return '\u2229'; 4691 case "cup": return '\u222A'; 4692 case "int": case "Integral": return '\u222B'; 4693 case "Int": return '\u222C'; 4694 case "tint": case "iiint": return '\u222D'; 4695 case "conint": case "oint": case "ContourIntegral": return '\u222E'; 4696 case "Conint": case "DoubleContourIntegral": return '\u222F'; 4697 case "Cconint": return '\u2230'; 4698 case "cwint": return '\u2231'; 4699 case "cwconint": case "ClockwiseContourIntegral": return '\u2232'; 4700 case "awconint": case "CounterClockwiseContourIntegral": return '\u2233'; 4701 case "there4": case "therefore": case "Therefore": return '\u2234'; 4702 case "becaus": case "because": case "Because": return '\u2235'; 4703 case "ratio": return '\u2236'; 4704 case "Colon": case "Proportion": return '\u2237'; 4705 case "minusd": case "dotminus": return '\u2238'; 4706 case "mDDot": return '\u223A'; 4707 case "homtht": return '\u223B'; 4708 case "sim": case "Tilde": case "thksim": case "thicksim": return '\u223C'; 4709 case "bsim": case "backsim": return '\u223D'; 4710 case "ac": case "mstpos": return '\u223E'; 4711 case "acd": return '\u223F'; 4712 case "wreath": case "VerticalTilde": case "wr": return '\u2240'; 4713 case "nsim": case "NotTilde": return '\u2241'; 4714 case "esim": case "EqualTilde": case "eqsim": return '\u2242'; 4715 case "sime": case "TildeEqual": case "simeq": return '\u2243'; 4716 case "nsime": case "nsimeq": case "NotTildeEqual": return '\u2244'; 4717 case "cong": case "TildeFullEqual": return '\u2245'; 4718 case "simne": return '\u2246'; 4719 case "ncong": case "NotTildeFullEqual": return '\u2247'; 4720 case "asymp": case "ap": case "TildeTilde": case "approx": case "thkap": case "thickapprox": return '\u2248'; 4721 case "nap": case "NotTildeTilde": case "napprox": return '\u2249'; 4722 case "ape": case "approxeq": return '\u224A'; 4723 case "apid": return '\u224B'; 4724 case "bcong": case "backcong": return '\u224C'; 4725 case "asympeq": case "CupCap": return '\u224D'; 4726 case "bump": case "HumpDownHump": case "Bumpeq": return '\u224E'; 4727 case "bumpe": case "HumpEqual": case "bumpeq": return '\u224F'; 4728 case "esdot": case "DotEqual": case "doteq": return '\u2250'; 4729 case "eDot": case "doteqdot": return '\u2251'; 4730 case "efDot": case "fallingdotseq": return '\u2252'; 4731 case "erDot": case "risingdotseq": return '\u2253'; 4732 case "colone": case "coloneq": case "Assign": return '\u2254'; 4733 case "ecolon": case "eqcolon": return '\u2255'; 4734 case "ecir": case "eqcirc": return '\u2256'; 4735 case "cire": case "circeq": return '\u2257'; 4736 case "wedgeq": return '\u2259'; 4737 case "veeeq": return '\u225A'; 4738 case "trie": case "triangleq": return '\u225C'; 4739 case "equest": case "questeq": return '\u225F'; 4740 case "ne": case "NotEqual": return '\u2260'; 4741 case "equiv": case "Congruent": return '\u2261'; 4742 case "nequiv": case "NotCongruent": return '\u2262'; 4743 case "le": case "leq": return '\u2264'; 4744 case "ge": case "GreaterEqual": case "geq": return '\u2265'; 4745 case "lE": case "LessFullEqual": case "leqq": return '\u2266'; 4746 case "gE": case "GreaterFullEqual": case "geqq": return '\u2267'; 4747 case "lnE": case "lneqq": return '\u2268'; 4748 case "gnE": case "gneqq": return '\u2269'; 4749 case "Lt": case "NestedLessLess": case "ll": return '\u226A'; 4750 case "Gt": case "NestedGreaterGreater": case "gg": return '\u226B'; 4751 case "twixt": case "between": return '\u226C'; 4752 case "NotCupCap": return '\u226D'; 4753 case "nlt": case "NotLess": case "nless": return '\u226E'; 4754 case "ngt": case "NotGreater": case "ngtr": return '\u226F'; 4755 case "nle": case "NotLessEqual": case "nleq": return '\u2270'; 4756 case "nge": case "NotGreaterEqual": case "ngeq": return '\u2271'; 4757 case "lsim": case "LessTilde": case "lesssim": return '\u2272'; 4758 case "gsim": case "gtrsim": case "GreaterTilde": return '\u2273'; 4759 case "nlsim": case "NotLessTilde": return '\u2274'; 4760 case "ngsim": case "NotGreaterTilde": return '\u2275'; 4761 case "lg": case "lessgtr": case "LessGreater": return '\u2276'; 4762 case "gl": case "gtrless": case "GreaterLess": return '\u2277'; 4763 case "ntlg": case "NotLessGreater": return '\u2278'; 4764 case "ntgl": case "NotGreaterLess": return '\u2279'; 4765 case "pr": case "Precedes": case "prec": return '\u227A'; 4766 case "sc": case "Succeeds": case "succ": return '\u227B'; 4767 case "prcue": case "PrecedesSlantEqual": case "preccurlyeq": return '\u227C'; 4768 case "sccue": case "SucceedsSlantEqual": case "succcurlyeq": return '\u227D'; 4769 case "prsim": case "precsim": case "PrecedesTilde": return '\u227E'; 4770 case "scsim": case "succsim": case "SucceedsTilde": return '\u227F'; 4771 case "npr": case "nprec": case "NotPrecedes": return '\u2280'; 4772 case "nsc": case "nsucc": case "NotSucceeds": return '\u2281'; 4773 case "sub": case "subset": return '\u2282'; 4774 case "sup": case "supset": case "Superset": return '\u2283'; 4775 case "nsub": return '\u2284'; 4776 case "nsup": return '\u2285'; 4777 case "sube": case "SubsetEqual": case "subseteq": return '\u2286'; 4778 case "supe": case "supseteq": case "SupersetEqual": return '\u2287'; 4779 case "nsube": case "nsubseteq": case "NotSubsetEqual": return '\u2288'; 4780 case "nsupe": case "nsupseteq": case "NotSupersetEqual": return '\u2289'; 4781 case "subne": case "subsetneq": return '\u228A'; 4782 case "supne": case "supsetneq": return '\u228B'; 4783 case "cupdot": return '\u228D'; 4784 case "uplus": case "UnionPlus": return '\u228E'; 4785 case "sqsub": case "SquareSubset": case "sqsubset": return '\u228F'; 4786 case "sqsup": case "SquareSuperset": case "sqsupset": return '\u2290'; 4787 case "sqsube": case "SquareSubsetEqual": case "sqsubseteq": return '\u2291'; 4788 case "sqsupe": case "SquareSupersetEqual": case "sqsupseteq": return '\u2292'; 4789 case "sqcap": case "SquareIntersection": return '\u2293'; 4790 case "sqcup": case "SquareUnion": return '\u2294'; 4791 case "oplus": case "CirclePlus": return '\u2295'; 4792 case "ominus": case "CircleMinus": return '\u2296'; 4793 case "otimes": case "CircleTimes": return '\u2297'; 4794 case "osol": return '\u2298'; 4795 case "odot": case "CircleDot": return '\u2299'; 4796 case "ocir": case "circledcirc": return '\u229A'; 4797 case "oast": case "circledast": return '\u229B'; 4798 case "odash": case "circleddash": return '\u229D'; 4799 case "plusb": case "boxplus": return '\u229E'; 4800 case "minusb": case "boxminus": return '\u229F'; 4801 case "timesb": case "boxtimes": return '\u22A0'; 4802 case "sdotb": case "dotsquare": return '\u22A1'; 4803 case "vdash": case "RightTee": return '\u22A2'; 4804 case "dashv": case "LeftTee": return '\u22A3'; 4805 case "top": case "DownTee": return '\u22A4'; 4806 case "bottom": case "bot": case "perp": case "UpTee": return '\u22A5'; 4807 case "models": return '\u22A7'; 4808 case "vDash": case "DoubleRightTee": return '\u22A8'; 4809 case "Vdash": return '\u22A9'; 4810 case "Vvdash": return '\u22AA'; 4811 case "VDash": return '\u22AB'; 4812 case "nvdash": return '\u22AC'; 4813 case "nvDash": return '\u22AD'; 4814 case "nVdash": return '\u22AE'; 4815 case "nVDash": return '\u22AF'; 4816 case "prurel": return '\u22B0'; 4817 case "vltri": case "vartriangleleft": case "LeftTriangle": return '\u22B2'; 4818 case "vrtri": case "vartriangleright": case "RightTriangle": return '\u22B3'; 4819 case "ltrie": case "trianglelefteq": case "LeftTriangleEqual": return '\u22B4'; 4820 case "rtrie": case "trianglerighteq": case "RightTriangleEqual": return '\u22B5'; 4821 case "origof": return '\u22B6'; 4822 case "imof": return '\u22B7'; 4823 case "mumap": case "multimap": return '\u22B8'; 4824 case "hercon": return '\u22B9'; 4825 case "intcal": case "intercal": return '\u22BA'; 4826 case "veebar": return '\u22BB'; 4827 case "barvee": return '\u22BD'; 4828 case "angrtvb": return '\u22BE'; 4829 case "lrtri": return '\u22BF'; 4830 case "xwedge": case "Wedge": case "bigwedge": return '\u22C0'; 4831 case "xvee": case "Vee": case "bigvee": return '\u22C1'; 4832 case "xcap": case "Intersection": case "bigcap": return '\u22C2'; 4833 case "xcup": case "Union": case "bigcup": return '\u22C3'; 4834 case "diam": case "diamond": case "Diamond": return '\u22C4'; 4835 case "sdot": return '\u22C5'; 4836 case "sstarf": case "Star": return '\u22C6'; 4837 case "divonx": case "divideontimes": return '\u22C7'; 4838 case "bowtie": return '\u22C8'; 4839 case "ltimes": return '\u22C9'; 4840 case "rtimes": return '\u22CA'; 4841 case "lthree": case "leftthreetimes": return '\u22CB'; 4842 case "rthree": case "rightthreetimes": return '\u22CC'; 4843 case "bsime": case "backsimeq": return '\u22CD'; 4844 case "cuvee": case "curlyvee": return '\u22CE'; 4845 case "cuwed": case "curlywedge": return '\u22CF'; 4846 case "Sub": case "Subset": return '\u22D0'; 4847 case "Sup": case "Supset": return '\u22D1'; 4848 case "Cap": return '\u22D2'; 4849 case "Cup": return '\u22D3'; 4850 case "fork": case "pitchfork": return '\u22D4'; 4851 case "epar": return '\u22D5'; 4852 case "ltdot": case "lessdot": return '\u22D6'; 4853 case "gtdot": case "gtrdot": return '\u22D7'; 4854 case "Ll": return '\u22D8'; 4855 case "Gg": case "ggg": return '\u22D9'; 4856 case "leg": case "LessEqualGreater": case "lesseqgtr": return '\u22DA'; 4857 case "gel": case "gtreqless": case "GreaterEqualLess": return '\u22DB'; 4858 case "cuepr": case "curlyeqprec": return '\u22DE'; 4859 case "cuesc": case "curlyeqsucc": return '\u22DF'; 4860 case "nprcue": case "NotPrecedesSlantEqual": return '\u22E0'; 4861 case "nsccue": case "NotSucceedsSlantEqual": return '\u22E1'; 4862 case "nsqsube": case "NotSquareSubsetEqual": return '\u22E2'; 4863 case "nsqsupe": case "NotSquareSupersetEqual": return '\u22E3'; 4864 case "lnsim": return '\u22E6'; 4865 case "gnsim": return '\u22E7'; 4866 case "prnsim": case "precnsim": return '\u22E8'; 4867 case "scnsim": case "succnsim": return '\u22E9'; 4868 case "nltri": case "ntriangleleft": case "NotLeftTriangle": return '\u22EA'; 4869 case "nrtri": case "ntriangleright": case "NotRightTriangle": return '\u22EB'; 4870 case "nltrie": case "ntrianglelefteq": case "NotLeftTriangleEqual": return '\u22EC'; 4871 case "nrtrie": case "ntrianglerighteq": case "NotRightTriangleEqual": return '\u22ED'; 4872 case "vellip": return '\u22EE'; 4873 case "ctdot": return '\u22EF'; 4874 case "utdot": return '\u22F0'; 4875 case "dtdot": return '\u22F1'; 4876 case "disin": return '\u22F2'; 4877 case "isinsv": return '\u22F3'; 4878 case "isins": return '\u22F4'; 4879 case "isindot": return '\u22F5'; 4880 case "notinvc": return '\u22F6'; 4881 case "notinvb": return '\u22F7'; 4882 case "isinE": return '\u22F9'; 4883 case "nisd": return '\u22FA'; 4884 case "xnis": return '\u22FB'; 4885 case "nis": return '\u22FC'; 4886 case "notnivc": return '\u22FD'; 4887 case "notnivb": return '\u22FE'; 4888 case "barwed": case "barwedge": return '\u2305'; 4889 case "Barwed": case "doublebarwedge": return '\u2306'; 4890 case "lceil": case "LeftCeiling": return '\u2308'; 4891 case "rceil": case "RightCeiling": return '\u2309'; 4892 case "lfloor": case "LeftFloor": return '\u230A'; 4893 case "rfloor": case "RightFloor": return '\u230B'; 4894 case "drcrop": return '\u230C'; 4895 case "dlcrop": return '\u230D'; 4896 case "urcrop": return '\u230E'; 4897 case "ulcrop": return '\u230F'; 4898 case "bnot": return '\u2310'; 4899 case "profline": return '\u2312'; 4900 case "profsurf": return '\u2313'; 4901 case "telrec": return '\u2315'; 4902 case "target": return '\u2316'; 4903 case "ulcorn": case "ulcorner": return '\u231C'; 4904 case "urcorn": case "urcorner": return '\u231D'; 4905 case "dlcorn": case "llcorner": return '\u231E'; 4906 case "drcorn": case "lrcorner": return '\u231F'; 4907 case "frown": case "sfrown": return '\u2322'; 4908 case "smile": case "ssmile": return '\u2323'; 4909 case "cylcty": return '\u232D'; 4910 case "profalar": return '\u232E'; 4911 case "topbot": return '\u2336'; 4912 case "ovbar": return '\u233D'; 4913 case "solbar": return '\u233F'; 4914 case "angzarr": return '\u237C'; 4915 case "lmoust": case "lmoustache": return '\u23B0'; 4916 case "rmoust": case "rmoustache": return '\u23B1'; 4917 case "tbrk": case "OverBracket": return '\u23B4'; 4918 case "bbrk": case "UnderBracket": return '\u23B5'; 4919 case "bbrktbrk": return '\u23B6'; 4920 case "OverParenthesis": return '\u23DC'; 4921 case "UnderParenthesis": return '\u23DD'; 4922 case "OverBrace": return '\u23DE'; 4923 case "UnderBrace": return '\u23DF'; 4924 case "trpezium": return '\u23E2'; 4925 case "elinters": return '\u23E7'; 4926 case "blank": return '\u2423'; 4927 case "oS": case "circledS": return '\u24C8'; 4928 case "boxh": case "HorizontalLine": return '\u2500'; 4929 case "boxv": return '\u2502'; 4930 case "boxdr": return '\u250C'; 4931 case "boxdl": return '\u2510'; 4932 case "boxur": return '\u2514'; 4933 case "boxul": return '\u2518'; 4934 case "boxvr": return '\u251C'; 4935 case "boxvl": return '\u2524'; 4936 case "boxhd": return '\u252C'; 4937 case "boxhu": return '\u2534'; 4938 case "boxvh": return '\u253C'; 4939 case "boxH": return '\u2550'; 4940 case "boxV": return '\u2551'; 4941 case "boxdR": return '\u2552'; 4942 case "boxDr": return '\u2553'; 4943 case "boxDR": return '\u2554'; 4944 case "boxdL": return '\u2555'; 4945 case "boxDl": return '\u2556'; 4946 case "boxDL": return '\u2557'; 4947 case "boxuR": return '\u2558'; 4948 case "boxUr": return '\u2559'; 4949 case "boxUR": return '\u255A'; 4950 case "boxuL": return '\u255B'; 4951 case "boxUl": return '\u255C'; 4952 case "boxUL": return '\u255D'; 4953 case "boxvR": return '\u255E'; 4954 case "boxVr": return '\u255F'; 4955 case "boxVR": return '\u2560'; 4956 case "boxvL": return '\u2561'; 4957 case "boxVl": return '\u2562'; 4958 case "boxVL": return '\u2563'; 4959 case "boxHd": return '\u2564'; 4960 case "boxhD": return '\u2565'; 4961 case "boxHD": return '\u2566'; 4962 case "boxHu": return '\u2567'; 4963 case "boxhU": return '\u2568'; 4964 case "boxHU": return '\u2569'; 4965 case "boxvH": return '\u256A'; 4966 case "boxVh": return '\u256B'; 4967 case "boxVH": return '\u256C'; 4968 case "uhblk": return '\u2580'; 4969 case "lhblk": return '\u2584'; 4970 case "block": return '\u2588'; 4971 case "blk14": return '\u2591'; 4972 case "blk12": return '\u2592'; 4973 case "blk34": return '\u2593'; 4974 case "squ": case "square": case "Square": return '\u25A1'; 4975 case "squf": case "squarf": case "blacksquare": case "FilledVerySmallSquare": return '\u25AA'; 4976 case "EmptyVerySmallSquare": return '\u25AB'; 4977 case "rect": return '\u25AD'; 4978 case "marker": return '\u25AE'; 4979 case "fltns": return '\u25B1'; 4980 case "xutri": case "bigtriangleup": return '\u25B3'; 4981 case "utrif": case "blacktriangle": return '\u25B4'; 4982 case "utri": case "triangle": return '\u25B5'; 4983 case "rtrif": case "blacktriangleright": return '\u25B8'; 4984 case "rtri": case "triangleright": return '\u25B9'; 4985 case "xdtri": case "bigtriangledown": return '\u25BD'; 4986 case "dtrif": case "blacktriangledown": return '\u25BE'; 4987 case "dtri": case "triangledown": return '\u25BF'; 4988 case "ltrif": case "blacktriangleleft": return '\u25C2'; 4989 case "ltri": case "triangleleft": return '\u25C3'; 4990 case "loz": case "lozenge": return '\u25CA'; 4991 case "cir": return '\u25CB'; 4992 case "tridot": return '\u25EC'; 4993 case "xcirc": case "bigcirc": return '\u25EF'; 4994 case "ultri": return '\u25F8'; 4995 case "urtri": return '\u25F9'; 4996 case "lltri": return '\u25FA'; 4997 case "EmptySmallSquare": return '\u25FB'; 4998 case "FilledSmallSquare": return '\u25FC'; 4999 case "starf": case "bigstar": return '\u2605'; 5000 case "star": return '\u2606'; 5001 case "phone": return '\u260E'; 5002 case "female": return '\u2640'; 5003 case "male": return '\u2642'; 5004 case "spades": case "spadesuit": return '\u2660'; 5005 case "clubs": case "clubsuit": return '\u2663'; 5006 case "hearts": case "heartsuit": return '\u2665'; 5007 case "diams": case "diamondsuit": return '\u2666'; 5008 case "sung": return '\u266A'; 5009 case "flat": return '\u266D'; 5010 case "natur": case "natural": return '\u266E'; 5011 case "sharp": return '\u266F'; 5012 case "check": case "checkmark": return '\u2713'; 5013 case "cross": return '\u2717'; 5014 case "malt": case "maltese": return '\u2720'; 5015 case "sext": return '\u2736'; 5016 case "VerticalSeparator": return '\u2758'; 5017 case "lbbrk": return '\u2772'; 5018 case "rbbrk": return '\u2773'; 5019 case "bsolhsub": return '\u27C8'; 5020 case "suphsol": return '\u27C9'; 5021 case "lobrk": case "LeftDoubleBracket": return '\u27E6'; 5022 case "robrk": case "RightDoubleBracket": return '\u27E7'; 5023 case "lang": case "LeftAngleBracket": case "langle": return '\u27E8'; 5024 case "rang": case "RightAngleBracket": case "rangle": return '\u27E9'; 5025 case "Lang": return '\u27EA'; 5026 case "Rang": return '\u27EB'; 5027 case "loang": return '\u27EC'; 5028 case "roang": return '\u27ED'; 5029 case "xlarr": case "longleftarrow": case "LongLeftArrow": return '\u27F5'; 5030 case "xrarr": case "longrightarrow": case "LongRightArrow": return '\u27F6'; 5031 case "xharr": case "longleftrightarrow": case "LongLeftRightArrow": return '\u27F7'; 5032 case "xlArr": case "Longleftarrow": case "DoubleLongLeftArrow": return '\u27F8'; 5033 case "xrArr": case "Longrightarrow": case "DoubleLongRightArrow": return '\u27F9'; 5034 case "xhArr": case "Longleftrightarrow": case "DoubleLongLeftRightArrow": return '\u27FA'; 5035 case "xmap": case "longmapsto": return '\u27FC'; 5036 case "dzigrarr": return '\u27FF'; 5037 case "nvlArr": return '\u2902'; 5038 case "nvrArr": return '\u2903'; 5039 case "nvHarr": return '\u2904'; 5040 case "Map": return '\u2905'; 5041 case "lbarr": return '\u290C'; 5042 case "rbarr": case "bkarow": return '\u290D'; 5043 case "lBarr": return '\u290E'; 5044 case "rBarr": case "dbkarow": return '\u290F'; 5045 case "RBarr": case "drbkarow": return '\u2910'; 5046 case "DDotrahd": return '\u2911'; 5047 case "UpArrowBar": return '\u2912'; 5048 case "DownArrowBar": return '\u2913'; 5049 case "Rarrtl": return '\u2916'; 5050 case "latail": return '\u2919'; 5051 case "ratail": return '\u291A'; 5052 case "lAtail": return '\u291B'; 5053 case "rAtail": return '\u291C'; 5054 case "larrfs": return '\u291D'; 5055 case "rarrfs": return '\u291E'; 5056 case "larrbfs": return '\u291F'; 5057 case "rarrbfs": return '\u2920'; 5058 case "nwarhk": return '\u2923'; 5059 case "nearhk": return '\u2924'; 5060 case "searhk": case "hksearow": return '\u2925'; 5061 case "swarhk": case "hkswarow": return '\u2926'; 5062 case "nwnear": return '\u2927'; 5063 case "nesear": case "toea": return '\u2928'; 5064 case "seswar": case "tosa": return '\u2929'; 5065 case "swnwar": return '\u292A'; 5066 case "rarrc": return '\u2933'; 5067 case "cudarrr": return '\u2935'; 5068 case "ldca": return '\u2936'; 5069 case "rdca": return '\u2937'; 5070 case "cudarrl": return '\u2938'; 5071 case "larrpl": return '\u2939'; 5072 case "curarrm": return '\u293C'; 5073 case "cularrp": return '\u293D'; 5074 case "rarrpl": return '\u2945'; 5075 case "harrcir": return '\u2948'; 5076 case "Uarrocir": return '\u2949'; 5077 case "lurdshar": return '\u294A'; 5078 case "ldrushar": return '\u294B'; 5079 case "LeftRightVector": return '\u294E'; 5080 case "RightUpDownVector": return '\u294F'; 5081 case "DownLeftRightVector": return '\u2950'; 5082 case "LeftUpDownVector": return '\u2951'; 5083 case "LeftVectorBar": return '\u2952'; 5084 case "RightVectorBar": return '\u2953'; 5085 case "RightUpVectorBar": return '\u2954'; 5086 case "RightDownVectorBar": return '\u2955'; 5087 case "DownLeftVectorBar": return '\u2956'; 5088 case "DownRightVectorBar": return '\u2957'; 5089 case "LeftUpVectorBar": return '\u2958'; 5090 case "LeftDownVectorBar": return '\u2959'; 5091 case "LeftTeeVector": return '\u295A'; 5092 case "RightTeeVector": return '\u295B'; 5093 case "RightUpTeeVector": return '\u295C'; 5094 case "RightDownTeeVector": return '\u295D'; 5095 case "DownLeftTeeVector": return '\u295E'; 5096 case "DownRightTeeVector": return '\u295F'; 5097 case "LeftUpTeeVector": return '\u2960'; 5098 case "LeftDownTeeVector": return '\u2961'; 5099 case "lHar": return '\u2962'; 5100 case "uHar": return '\u2963'; 5101 case "rHar": return '\u2964'; 5102 case "dHar": return '\u2965'; 5103 case "luruhar": return '\u2966'; 5104 case "ldrdhar": return '\u2967'; 5105 case "ruluhar": return '\u2968'; 5106 case "rdldhar": return '\u2969'; 5107 case "lharul": return '\u296A'; 5108 case "llhard": return '\u296B'; 5109 case "rharul": return '\u296C'; 5110 case "lrhard": return '\u296D'; 5111 case "udhar": case "UpEquilibrium": return '\u296E'; 5112 case "duhar": case "ReverseUpEquilibrium": return '\u296F'; 5113 case "RoundImplies": return '\u2970'; 5114 case "erarr": return '\u2971'; 5115 case "simrarr": return '\u2972'; 5116 case "larrsim": return '\u2973'; 5117 case "rarrsim": return '\u2974'; 5118 case "rarrap": return '\u2975'; 5119 case "ltlarr": return '\u2976'; 5120 case "gtrarr": return '\u2978'; 5121 case "subrarr": return '\u2979'; 5122 case "suplarr": return '\u297B'; 5123 case "lfisht": return '\u297C'; 5124 case "rfisht": return '\u297D'; 5125 case "ufisht": return '\u297E'; 5126 case "dfisht": return '\u297F'; 5127 case "lopar": return '\u2985'; 5128 case "ropar": return '\u2986'; 5129 case "lbrke": return '\u298B'; 5130 case "rbrke": return '\u298C'; 5131 case "lbrkslu": return '\u298D'; 5132 case "rbrksld": return '\u298E'; 5133 case "lbrksld": return '\u298F'; 5134 case "rbrkslu": return '\u2990'; 5135 case "langd": return '\u2991'; 5136 case "rangd": return '\u2992'; 5137 case "lparlt": return '\u2993'; 5138 case "rpargt": return '\u2994'; 5139 case "gtlPar": return '\u2995'; 5140 case "ltrPar": return '\u2996'; 5141 case "vzigzag": return '\u299A'; 5142 case "vangrt": return '\u299C'; 5143 case "angrtvbd": return '\u299D'; 5144 case "ange": return '\u29A4'; 5145 case "range": return '\u29A5'; 5146 case "dwangle": return '\u29A6'; 5147 case "uwangle": return '\u29A7'; 5148 case "angmsdaa": return '\u29A8'; 5149 case "angmsdab": return '\u29A9'; 5150 case "angmsdac": return '\u29AA'; 5151 case "angmsdad": return '\u29AB'; 5152 case "angmsdae": return '\u29AC'; 5153 case "angmsdaf": return '\u29AD'; 5154 case "angmsdag": return '\u29AE'; 5155 case "angmsdah": return '\u29AF'; 5156 case "bemptyv": return '\u29B0'; 5157 case "demptyv": return '\u29B1'; 5158 case "cemptyv": return '\u29B2'; 5159 case "raemptyv": return '\u29B3'; 5160 case "laemptyv": return '\u29B4'; 5161 case "ohbar": return '\u29B5'; 5162 case "omid": return '\u29B6'; 5163 case "opar": return '\u29B7'; 5164 case "operp": return '\u29B9'; 5165 case "olcross": return '\u29BB'; 5166 case "odsold": return '\u29BC'; 5167 case "olcir": return '\u29BE'; 5168 case "ofcir": return '\u29BF'; 5169 case "olt": return '\u29C0'; 5170 case "ogt": return '\u29C1'; 5171 case "cirscir": return '\u29C2'; 5172 case "cirE": return '\u29C3'; 5173 case "solb": return '\u29C4'; 5174 case "bsolb": return '\u29C5'; 5175 case "boxbox": return '\u29C9'; 5176 case "trisb": return '\u29CD'; 5177 case "rtriltri": return '\u29CE'; 5178 case "LeftTriangleBar": return '\u29CF'; 5179 case "RightTriangleBar": return '\u29D0'; 5180 case "iinfin": return '\u29DC'; 5181 case "infintie": return '\u29DD'; 5182 case "nvinfin": return '\u29DE'; 5183 case "eparsl": return '\u29E3'; 5184 case "smeparsl": return '\u29E4'; 5185 case "eqvparsl": return '\u29E5'; 5186 case "lozf": case "blacklozenge": return '\u29EB'; 5187 case "RuleDelayed": return '\u29F4'; 5188 case "dsol": return '\u29F6'; 5189 case "xodot": case "bigodot": return '\u2A00'; 5190 case "xoplus": case "bigoplus": return '\u2A01'; 5191 case "xotime": case "bigotimes": return '\u2A02'; 5192 case "xuplus": case "biguplus": return '\u2A04'; 5193 case "xsqcup": case "bigsqcup": return '\u2A06'; 5194 case "qint": case "iiiint": return '\u2A0C'; 5195 case "fpartint": return '\u2A0D'; 5196 case "cirfnint": return '\u2A10'; 5197 case "awint": return '\u2A11'; 5198 case "rppolint": return '\u2A12'; 5199 case "scpolint": return '\u2A13'; 5200 case "npolint": return '\u2A14'; 5201 case "pointint": return '\u2A15'; 5202 case "quatint": return '\u2A16'; 5203 case "intlarhk": return '\u2A17'; 5204 case "pluscir": return '\u2A22'; 5205 case "plusacir": return '\u2A23'; 5206 case "simplus": return '\u2A24'; 5207 case "plusdu": return '\u2A25'; 5208 case "plussim": return '\u2A26'; 5209 case "plustwo": return '\u2A27'; 5210 case "mcomma": return '\u2A29'; 5211 case "minusdu": return '\u2A2A'; 5212 case "loplus": return '\u2A2D'; 5213 case "roplus": return '\u2A2E'; 5214 case "Cross": return '\u2A2F'; 5215 case "timesd": return '\u2A30'; 5216 case "timesbar": return '\u2A31'; 5217 case "smashp": return '\u2A33'; 5218 case "lotimes": return '\u2A34'; 5219 case "rotimes": return '\u2A35'; 5220 case "otimesas": return '\u2A36'; 5221 case "Otimes": return '\u2A37'; 5222 case "odiv": return '\u2A38'; 5223 case "triplus": return '\u2A39'; 5224 case "triminus": return '\u2A3A'; 5225 case "tritime": return '\u2A3B'; 5226 case "iprod": case "intprod": return '\u2A3C'; 5227 case "amalg": return '\u2A3F'; 5228 case "capdot": return '\u2A40'; 5229 case "ncup": return '\u2A42'; 5230 case "ncap": return '\u2A43'; 5231 case "capand": return '\u2A44'; 5232 case "cupor": return '\u2A45'; 5233 case "cupcap": return '\u2A46'; 5234 case "capcup": return '\u2A47'; 5235 case "cupbrcap": return '\u2A48'; 5236 case "capbrcup": return '\u2A49'; 5237 case "cupcup": return '\u2A4A'; 5238 case "capcap": return '\u2A4B'; 5239 case "ccups": return '\u2A4C'; 5240 case "ccaps": return '\u2A4D'; 5241 case "ccupssm": return '\u2A50'; 5242 case "And": return '\u2A53'; 5243 case "Or": return '\u2A54'; 5244 case "andand": return '\u2A55'; 5245 case "oror": return '\u2A56'; 5246 case "orslope": return '\u2A57'; 5247 case "andslope": return '\u2A58'; 5248 case "andv": return '\u2A5A'; 5249 case "orv": return '\u2A5B'; 5250 case "andd": return '\u2A5C'; 5251 case "ord": return '\u2A5D'; 5252 case "wedbar": return '\u2A5F'; 5253 case "sdote": return '\u2A66'; 5254 case "simdot": return '\u2A6A'; 5255 case "congdot": return '\u2A6D'; 5256 case "easter": return '\u2A6E'; 5257 case "apacir": return '\u2A6F'; 5258 case "apE": return '\u2A70'; 5259 case "eplus": return '\u2A71'; 5260 case "pluse": return '\u2A72'; 5261 case "Esim": return '\u2A73'; 5262 case "Colone": return '\u2A74'; 5263 case "Equal": return '\u2A75'; 5264 case "eDDot": case "ddotseq": return '\u2A77'; 5265 case "equivDD": return '\u2A78'; 5266 case "ltcir": return '\u2A79'; 5267 case "gtcir": return '\u2A7A'; 5268 case "ltquest": return '\u2A7B'; 5269 case "gtquest": return '\u2A7C'; 5270 case "les": case "LessSlantEqual": case "leqslant": return '\u2A7D'; 5271 case "ges": case "GreaterSlantEqual": case "geqslant": return '\u2A7E'; 5272 case "lesdot": return '\u2A7F'; 5273 case "gesdot": return '\u2A80'; 5274 case "lesdoto": return '\u2A81'; 5275 case "gesdoto": return '\u2A82'; 5276 case "lesdotor": return '\u2A83'; 5277 case "gesdotol": return '\u2A84'; 5278 case "lap": case "lessapprox": return '\u2A85'; 5279 case "gap": case "gtrapprox": return '\u2A86'; 5280 case "lne": case "lneq": return '\u2A87'; 5281 case "gne": case "gneq": return '\u2A88'; 5282 case "lnap": case "lnapprox": return '\u2A89'; 5283 case "gnap": case "gnapprox": return '\u2A8A'; 5284 case "lEg": case "lesseqqgtr": return '\u2A8B'; 5285 case "gEl": case "gtreqqless": return '\u2A8C'; 5286 case "lsime": return '\u2A8D'; 5287 case "gsime": return '\u2A8E'; 5288 case "lsimg": return '\u2A8F'; 5289 case "gsiml": return '\u2A90'; 5290 case "lgE": return '\u2A91'; 5291 case "glE": return '\u2A92'; 5292 case "lesges": return '\u2A93'; 5293 case "gesles": return '\u2A94'; 5294 case "els": case "eqslantless": return '\u2A95'; 5295 case "egs": case "eqslantgtr": return '\u2A96'; 5296 case "elsdot": return '\u2A97'; 5297 case "egsdot": return '\u2A98'; 5298 case "el": return '\u2A99'; 5299 case "eg": return '\u2A9A'; 5300 case "siml": return '\u2A9D'; 5301 case "simg": return '\u2A9E'; 5302 case "simlE": return '\u2A9F'; 5303 case "simgE": return '\u2AA0'; 5304 case "LessLess": return '\u2AA1'; 5305 case "GreaterGreater": return '\u2AA2'; 5306 case "glj": return '\u2AA4'; 5307 case "gla": return '\u2AA5'; 5308 case "ltcc": return '\u2AA6'; 5309 case "gtcc": return '\u2AA7'; 5310 case "lescc": return '\u2AA8'; 5311 case "gescc": return '\u2AA9'; 5312 case "smt": return '\u2AAA'; 5313 case "lat": return '\u2AAB'; 5314 case "smte": return '\u2AAC'; 5315 case "late": return '\u2AAD'; 5316 case "bumpE": return '\u2AAE'; 5317 case "pre": case "preceq": case "PrecedesEqual": return '\u2AAF'; 5318 case "sce": case "succeq": case "SucceedsEqual": return '\u2AB0'; 5319 case "prE": return '\u2AB3'; 5320 case "scE": return '\u2AB4'; 5321 case "prnE": case "precneqq": return '\u2AB5'; 5322 case "scnE": case "succneqq": return '\u2AB6'; 5323 case "prap": case "precapprox": return '\u2AB7'; 5324 case "scap": case "succapprox": return '\u2AB8'; 5325 case "prnap": case "precnapprox": return '\u2AB9'; 5326 case "scnap": case "succnapprox": return '\u2ABA'; 5327 case "Pr": return '\u2ABB'; 5328 case "Sc": return '\u2ABC'; 5329 case "subdot": return '\u2ABD'; 5330 case "supdot": return '\u2ABE'; 5331 case "subplus": return '\u2ABF'; 5332 case "supplus": return '\u2AC0'; 5333 case "submult": return '\u2AC1'; 5334 case "supmult": return '\u2AC2'; 5335 case "subedot": return '\u2AC3'; 5336 case "supedot": return '\u2AC4'; 5337 case "subE": case "subseteqq": return '\u2AC5'; 5338 case "supE": case "supseteqq": return '\u2AC6'; 5339 case "subsim": return '\u2AC7'; 5340 case "supsim": return '\u2AC8'; 5341 case "subnE": case "subsetneqq": return '\u2ACB'; 5342 case "supnE": case "supsetneqq": return '\u2ACC'; 5343 case "csub": return '\u2ACF'; 5344 case "csup": return '\u2AD0'; 5345 case "csube": return '\u2AD1'; 5346 case "csupe": return '\u2AD2'; 5347 case "subsup": return '\u2AD3'; 5348 case "supsub": return '\u2AD4'; 5349 case "subsub": return '\u2AD5'; 5350 case "supsup": return '\u2AD6'; 5351 case "suphsub": return '\u2AD7'; 5352 case "supdsub": return '\u2AD8'; 5353 case "forkv": return '\u2AD9'; 5354 case "topfork": return '\u2ADA'; 5355 case "mlcp": return '\u2ADB'; 5356 case "Dashv": case "DoubleLeftTee": return '\u2AE4'; 5357 case "Vdashl": return '\u2AE6'; 5358 case "Barv": return '\u2AE7'; 5359 case "vBar": return '\u2AE8'; 5360 case "vBarv": return '\u2AE9'; 5361 case "Vbar": return '\u2AEB'; 5362 case "Not": return '\u2AEC'; 5363 case "bNot": return '\u2AED'; 5364 case "rnmid": return '\u2AEE'; 5365 case "cirmid": return '\u2AEF'; 5366 case "midcir": return '\u2AF0'; 5367 case "topcir": return '\u2AF1'; 5368 case "nhpar": return '\u2AF2'; 5369 case "parsim": return '\u2AF3'; 5370 case "parsl": return '\u2AFD'; 5371 case "fflig": return '\uFB00'; 5372 case "filig": return '\uFB01'; 5373 case "fllig": return '\uFB02'; 5374 case "ffilig": return '\uFB03'; 5375 case "ffllig": return '\uFB04'; 5376 case "Ascr": return '\U0001D49C'; 5377 case "Cscr": return '\U0001D49E'; 5378 case "Dscr": return '\U0001D49F'; 5379 case "Gscr": return '\U0001D4A2'; 5380 case "Jscr": return '\U0001D4A5'; 5381 case "Kscr": return '\U0001D4A6'; 5382 case "Nscr": return '\U0001D4A9'; 5383 case "Oscr": return '\U0001D4AA'; 5384 case "Pscr": return '\U0001D4AB'; 5385 case "Qscr": return '\U0001D4AC'; 5386 case "Sscr": return '\U0001D4AE'; 5387 case "Tscr": return '\U0001D4AF'; 5388 case "Uscr": return '\U0001D4B0'; 5389 case "Vscr": return '\U0001D4B1'; 5390 case "Wscr": return '\U0001D4B2'; 5391 case "Xscr": return '\U0001D4B3'; 5392 case "Yscr": return '\U0001D4B4'; 5393 case "Zscr": return '\U0001D4B5'; 5394 case "ascr": return '\U0001D4B6'; 5395 case "bscr": return '\U0001D4B7'; 5396 case "cscr": return '\U0001D4B8'; 5397 case "dscr": return '\U0001D4B9'; 5398 case "fscr": return '\U0001D4BB'; 5399 case "hscr": return '\U0001D4BD'; 5400 case "iscr": return '\U0001D4BE'; 5401 case "jscr": return '\U0001D4BF'; 5402 case "kscr": return '\U0001D4C0'; 5403 case "lscr": return '\U0001D4C1'; 5404 case "mscr": return '\U0001D4C2'; 5405 case "nscr": return '\U0001D4C3'; 5406 case "pscr": return '\U0001D4C5'; 5407 case "qscr": return '\U0001D4C6'; 5408 case "rscr": return '\U0001D4C7'; 5409 case "sscr": return '\U0001D4C8'; 5410 case "tscr": return '\U0001D4C9'; 5411 case "uscr": return '\U0001D4CA'; 5412 case "vscr": return '\U0001D4CB'; 5413 case "wscr": return '\U0001D4CC'; 5414 case "xscr": return '\U0001D4CD'; 5415 case "yscr": return '\U0001D4CE'; 5416 case "zscr": return '\U0001D4CF'; 5417 case "Afr": return '\U0001D504'; 5418 case "Bfr": return '\U0001D505'; 5419 case "Dfr": return '\U0001D507'; 5420 case "Efr": return '\U0001D508'; 5421 case "Ffr": return '\U0001D509'; 5422 case "Gfr": return '\U0001D50A'; 5423 case "Jfr": return '\U0001D50D'; 5424 case "Kfr": return '\U0001D50E'; 5425 case "Lfr": return '\U0001D50F'; 5426 case "Mfr": return '\U0001D510'; 5427 case "Nfr": return '\U0001D511'; 5428 case "Ofr": return '\U0001D512'; 5429 case "Pfr": return '\U0001D513'; 5430 case "Qfr": return '\U0001D514'; 5431 case "Sfr": return '\U0001D516'; 5432 case "Tfr": return '\U0001D517'; 5433 case "Ufr": return '\U0001D518'; 5434 case "Vfr": return '\U0001D519'; 5435 case "Wfr": return '\U0001D51A'; 5436 case "Xfr": return '\U0001D51B'; 5437 case "Yfr": return '\U0001D51C'; 5438 case "afr": return '\U0001D51E'; 5439 case "bfr": return '\U0001D51F'; 5440 case "cfr": return '\U0001D520'; 5441 case "dfr": return '\U0001D521'; 5442 case "efr": return '\U0001D522'; 5443 case "ffr": return '\U0001D523'; 5444 case "gfr": return '\U0001D524'; 5445 case "hfr": return '\U0001D525'; 5446 case "ifr": return '\U0001D526'; 5447 case "jfr": return '\U0001D527'; 5448 case "kfr": return '\U0001D528'; 5449 case "lfr": return '\U0001D529'; 5450 case "mfr": return '\U0001D52A'; 5451 case "nfr": return '\U0001D52B'; 5452 case "ofr": return '\U0001D52C'; 5453 case "pfr": return '\U0001D52D'; 5454 case "qfr": return '\U0001D52E'; 5455 case "rfr": return '\U0001D52F'; 5456 case "sfr": return '\U0001D530'; 5457 case "tfr": return '\U0001D531'; 5458 case "ufr": return '\U0001D532'; 5459 case "vfr": return '\U0001D533'; 5460 case "wfr": return '\U0001D534'; 5461 case "xfr": return '\U0001D535'; 5462 case "yfr": return '\U0001D536'; 5463 case "zfr": return '\U0001D537'; 5464 case "Aopf": return '\U0001D538'; 5465 case "Bopf": return '\U0001D539'; 5466 case "Dopf": return '\U0001D53B'; 5467 case "Eopf": return '\U0001D53C'; 5468 case "Fopf": return '\U0001D53D'; 5469 case "Gopf": return '\U0001D53E'; 5470 case "Iopf": return '\U0001D540'; 5471 case "Jopf": return '\U0001D541'; 5472 case "Kopf": return '\U0001D542'; 5473 case "Lopf": return '\U0001D543'; 5474 case "Mopf": return '\U0001D544'; 5475 case "Oopf": return '\U0001D546'; 5476 case "Sopf": return '\U0001D54A'; 5477 case "Topf": return '\U0001D54B'; 5478 case "Uopf": return '\U0001D54C'; 5479 case "Vopf": return '\U0001D54D'; 5480 case "Wopf": return '\U0001D54E'; 5481 case "Xopf": return '\U0001D54F'; 5482 case "Yopf": return '\U0001D550'; 5483 case "aopf": return '\U0001D552'; 5484 case "bopf": return '\U0001D553'; 5485 case "copf": return '\U0001D554'; 5486 case "dopf": return '\U0001D555'; 5487 case "eopf": return '\U0001D556'; 5488 case "fopf": return '\U0001D557'; 5489 case "gopf": return '\U0001D558'; 5490 case "hopf": return '\U0001D559'; 5491 case "iopf": return '\U0001D55A'; 5492 case "jopf": return '\U0001D55B'; 5493 case "kopf": return '\U0001D55C'; 5494 case "lopf": return '\U0001D55D'; 5495 case "mopf": return '\U0001D55E'; 5496 case "nopf": return '\U0001D55F'; 5497 case "oopf": return '\U0001D560'; 5498 case "popf": return '\U0001D561'; 5499 case "qopf": return '\U0001D562'; 5500 case "ropf": return '\U0001D563'; 5501 case "sopf": return '\U0001D564'; 5502 case "topf": return '\U0001D565'; 5503 case "uopf": return '\U0001D566'; 5504 case "vopf": return '\U0001D567'; 5505 case "wopf": return '\U0001D568'; 5506 case "xopf": return '\U0001D569'; 5507 case "yopf": return '\U0001D56A'; 5508 case "zopf": return '\U0001D56B'; 5509 5510 // and handling numeric entities 5511 default: 5512 if(entity[1] == '#') { 5513 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5514 auto hex = entity[3..$-1]; 5515 5516 auto p = intFromHex(to!string(hex).toLower()); 5517 return cast(dchar) p; 5518 } else { 5519 auto decimal = entity[2..$-1]; 5520 5521 // dealing with broken html entities 5522 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5523 decimal = decimal[1 .. $]; 5524 5525 if(decimal.length == 0) 5526 return ' '; // this is really broken html 5527 // done with dealing with broken stuff 5528 5529 auto p = std.conv.to!int(decimal); 5530 return cast(dchar) p; 5531 } 5532 } else 5533 return '\ufffd'; // replacement character diamond thing 5534 } 5535 5536 assert(0); 5537 } 5538 5539 import std.utf; 5540 import std.stdio; 5541 5542 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5543 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5544 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5545 /// Group: core_functionality 5546 string htmlEntitiesDecode(string data, bool strict = false) { 5547 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5548 if(data.indexOf("&") == -1) // all html entities begin with & 5549 return data; // if there are no entities in here, we can return the original slice and save some time 5550 5551 char[] a; // this seems to do a *better* job than appender! 5552 5553 char[4] buffer; 5554 5555 bool tryingEntity = false; 5556 dchar[16] entityBeingTried; 5557 int entityBeingTriedLength = 0; 5558 int entityAttemptIndex = 0; 5559 5560 foreach(dchar ch; data) { 5561 if(tryingEntity) { 5562 entityAttemptIndex++; 5563 entityBeingTried[entityBeingTriedLength++] = ch; 5564 5565 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5566 if(ch == '&') { 5567 if(strict) 5568 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5569 5570 // if not strict, let's try to parse both. 5571 5572 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") 5573 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5574 else 5575 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5576 5577 // tryingEntity is still true 5578 entityBeingTriedLength = 1; 5579 entityAttemptIndex = 0; // restarting o this 5580 } else 5581 if(ch == ';') { 5582 tryingEntity = false; 5583 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5584 } else if(ch == ' ') { 5585 // e.g. you & i 5586 if(strict) 5587 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5588 else { 5589 tryingEntity = false; 5590 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5591 } 5592 } else { 5593 if(entityAttemptIndex >= 9) { 5594 if(strict) 5595 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5596 else { 5597 tryingEntity = false; 5598 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5599 } 5600 } 5601 } 5602 } else { 5603 if(ch == '&') { 5604 tryingEntity = true; 5605 entityBeingTriedLength = 0; 5606 entityBeingTried[entityBeingTriedLength++] = ch; 5607 entityAttemptIndex = 0; 5608 } else { 5609 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5610 } 5611 } 5612 } 5613 5614 if(tryingEntity) { 5615 if(strict) 5616 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5617 5618 // otherwise, let's try to recover, at least so we don't drop any data 5619 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5620 // FIXME: what if we have "cool &"? should we try to parse it? 5621 } 5622 5623 return cast(string) a; // assumeUnique is actually kinda slow, lol 5624 } 5625 5626 /// Group: implementations 5627 abstract class SpecialElement : Element { 5628 this(Document _parentDocument) { 5629 super(_parentDocument); 5630 } 5631 5632 ///. 5633 override Element appendChild(Element e) { 5634 assert(0, "Cannot append to a special node"); 5635 } 5636 5637 ///. 5638 @property override int nodeType() const { 5639 return 100; 5640 } 5641 } 5642 5643 ///. 5644 /// Group: implementations 5645 class RawSource : SpecialElement { 5646 ///. 5647 this(Document _parentDocument, string s) { 5648 super(_parentDocument); 5649 source = s; 5650 tagName = "#raw"; 5651 } 5652 5653 ///. 5654 override string nodeValue() const { 5655 return this.toString(); 5656 } 5657 5658 ///. 5659 override string writeToAppender(Appender!string where = appender!string()) const { 5660 where.put(source); 5661 return source; 5662 } 5663 5664 override string toPrettyString(bool, int, string) const { 5665 return source; 5666 } 5667 5668 5669 override RawSource cloneNode(bool deep) { 5670 return new RawSource(parentDocument, source); 5671 } 5672 5673 ///. 5674 string source; 5675 } 5676 5677 /// Group: implementations 5678 abstract class ServerSideCode : SpecialElement { 5679 this(Document _parentDocument, string type) { 5680 super(_parentDocument); 5681 tagName = "#" ~ type; 5682 } 5683 5684 ///. 5685 override string nodeValue() const { 5686 return this.source; 5687 } 5688 5689 ///. 5690 override string writeToAppender(Appender!string where = appender!string()) const { 5691 auto start = where.data.length; 5692 where.put("<"); 5693 where.put(source); 5694 where.put(">"); 5695 return where.data[start .. $]; 5696 } 5697 5698 override string toPrettyString(bool, int, string) const { 5699 return "<" ~ source ~ ">"; 5700 } 5701 5702 ///. 5703 string source; 5704 } 5705 5706 ///. 5707 /// Group: implementations 5708 class PhpCode : ServerSideCode { 5709 ///. 5710 this(Document _parentDocument, string s) { 5711 super(_parentDocument, "php"); 5712 source = s; 5713 } 5714 5715 override PhpCode cloneNode(bool deep) { 5716 return new PhpCode(parentDocument, source); 5717 } 5718 } 5719 5720 ///. 5721 /// Group: implementations 5722 class AspCode : ServerSideCode { 5723 ///. 5724 this(Document _parentDocument, string s) { 5725 super(_parentDocument, "asp"); 5726 source = s; 5727 } 5728 5729 override AspCode cloneNode(bool deep) { 5730 return new AspCode(parentDocument, source); 5731 } 5732 } 5733 5734 ///. 5735 /// Group: implementations 5736 class BangInstruction : SpecialElement { 5737 ///. 5738 this(Document _parentDocument, string s) { 5739 super(_parentDocument); 5740 source = s; 5741 tagName = "#bpi"; 5742 } 5743 5744 ///. 5745 override string nodeValue() const { 5746 return this.source; 5747 } 5748 5749 override BangInstruction cloneNode(bool deep) { 5750 return new BangInstruction(parentDocument, source); 5751 } 5752 5753 ///. 5754 override string writeToAppender(Appender!string where = appender!string()) const { 5755 auto start = where.data.length; 5756 where.put("<!"); 5757 where.put(source); 5758 where.put(">"); 5759 return where.data[start .. $]; 5760 } 5761 5762 override string toPrettyString(bool, int, string) const { 5763 string s; 5764 s ~= "<!"; 5765 s ~= source; 5766 s ~= ">"; 5767 return s; 5768 } 5769 5770 ///. 5771 string source; 5772 } 5773 5774 ///. 5775 /// Group: implementations 5776 class QuestionInstruction : SpecialElement { 5777 ///. 5778 this(Document _parentDocument, string s) { 5779 super(_parentDocument); 5780 source = s; 5781 tagName = "#qpi"; 5782 } 5783 5784 override QuestionInstruction cloneNode(bool deep) { 5785 return new QuestionInstruction(parentDocument, source); 5786 } 5787 5788 ///. 5789 override string nodeValue() const { 5790 return this.source; 5791 } 5792 5793 ///. 5794 override string writeToAppender(Appender!string where = appender!string()) const { 5795 auto start = where.data.length; 5796 where.put("<"); 5797 where.put(source); 5798 where.put(">"); 5799 return where.data[start .. $]; 5800 } 5801 5802 override string toPrettyString(bool, int, string) const { 5803 string s; 5804 s ~= "<"; 5805 s ~= source; 5806 s ~= ">"; 5807 return s; 5808 } 5809 5810 5811 ///. 5812 string source; 5813 } 5814 5815 ///. 5816 /// Group: implementations 5817 class HtmlComment : SpecialElement { 5818 ///. 5819 this(Document _parentDocument, string s) { 5820 super(_parentDocument); 5821 source = s; 5822 tagName = "#comment"; 5823 } 5824 5825 override HtmlComment cloneNode(bool deep) { 5826 return new HtmlComment(parentDocument, source); 5827 } 5828 5829 ///. 5830 override string nodeValue() const { 5831 return this.source; 5832 } 5833 5834 ///. 5835 override string writeToAppender(Appender!string where = appender!string()) const { 5836 auto start = where.data.length; 5837 where.put("<!--"); 5838 where.put(source); 5839 where.put("-->"); 5840 return where.data[start .. $]; 5841 } 5842 5843 override string toPrettyString(bool, int, string) const { 5844 string s; 5845 s ~= "<!--"; 5846 s ~= source; 5847 s ~= "-->"; 5848 return s; 5849 } 5850 5851 5852 ///. 5853 string source; 5854 } 5855 5856 5857 5858 5859 ///. 5860 /// Group: implementations 5861 class TextNode : Element { 5862 public: 5863 ///. 5864 this(Document _parentDocument, string e) { 5865 super(_parentDocument); 5866 contents = e; 5867 tagName = "#text"; 5868 } 5869 5870 /// 5871 this(string e) { 5872 this(null, e); 5873 } 5874 5875 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5876 5877 ///. 5878 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5879 auto e = new TextNode(_parentDocument, ""); 5880 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5881 return e; 5882 } 5883 5884 ///. 5885 override @property TextNode cloneNode(bool deep) { 5886 auto n = new TextNode(parentDocument, contents); 5887 return n; 5888 } 5889 5890 ///. 5891 override string nodeValue() const { 5892 return this.contents; //toString(); 5893 } 5894 5895 ///. 5896 @property override int nodeType() const { 5897 return NodeType.Text; 5898 } 5899 5900 ///. 5901 override string writeToAppender(Appender!string where = appender!string()) const { 5902 string s; 5903 if(contents.length) 5904 s = htmlEntitiesEncode(contents, where); 5905 else 5906 s = ""; 5907 5908 assert(s !is null); 5909 return s; 5910 } 5911 5912 override string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5913 string s; 5914 5915 string contents = this.contents; 5916 // we will first collapse the whitespace per html 5917 // sort of. note this can break stuff yo!!!! 5918 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5919 string n = ""; 5920 bool lastWasWhitespace = indentationLevel > 0; 5921 foreach(char c; contents) { 5922 if(c.isSimpleWhite) { 5923 if(!lastWasWhitespace) 5924 n ~= ' '; 5925 lastWasWhitespace = true; 5926 } else { 5927 n ~= c; 5928 lastWasWhitespace = false; 5929 } 5930 } 5931 5932 contents = n; 5933 } 5934 5935 if(this.parentNode !is null && this.parentNode.tagName != "p") { 5936 contents = contents.strip; 5937 } 5938 5939 auto e = htmlEntitiesEncode(contents); 5940 import std.algorithm.iteration : splitter; 5941 bool first = true; 5942 foreach(line; splitter(e, "\n")) { 5943 if(first) { 5944 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 5945 first = false; 5946 } else { 5947 s ~= "\n"; 5948 if(insertComments) 5949 s ~= "<!--"; 5950 foreach(i; 0 .. indentationLevel) 5951 s ~= "\t"; 5952 if(insertComments) 5953 s ~= "-->"; 5954 } 5955 s ~= line.stripRight; 5956 } 5957 return s; 5958 } 5959 5960 ///. 5961 override Element appendChild(Element e) { 5962 assert(0, "Cannot append to a text node"); 5963 } 5964 5965 ///. 5966 string contents; 5967 // alias contents content; // I just mistype this a lot, 5968 } 5969 5970 /** 5971 There are subclasses of Element offering improved helper 5972 functions for the element in HTML. 5973 */ 5974 5975 ///. 5976 /// Group: implementations 5977 class Link : Element { 5978 5979 ///. 5980 this(Document _parentDocument) { 5981 super(_parentDocument); 5982 this.tagName = "a"; 5983 } 5984 5985 5986 ///. 5987 this(string href, string text) { 5988 super("a"); 5989 setAttribute("href", href); 5990 innerText = text; 5991 } 5992 /+ 5993 /// Returns everything in the href EXCEPT the query string 5994 @property string targetSansQuery() { 5995 5996 } 5997 5998 ///. 5999 @property string domainName() { 6000 6001 } 6002 6003 ///. 6004 @property string path 6005 +/ 6006 /// This gets a variable from the URL's query string. 6007 string getValue(string name) { 6008 auto vars = variablesHash(); 6009 if(name in vars) 6010 return vars[name]; 6011 return null; 6012 } 6013 6014 private string[string] variablesHash() { 6015 string href = getAttribute("href"); 6016 if(href is null) 6017 return null; 6018 6019 auto ques = href.indexOf("?"); 6020 string str = ""; 6021 if(ques != -1) { 6022 str = href[ques+1..$]; 6023 6024 auto fragment = str.indexOf("#"); 6025 if(fragment != -1) 6026 str = str[0..fragment]; 6027 } 6028 6029 string[] variables = str.split("&"); 6030 6031 string[string] hash; 6032 6033 foreach(var; variables) { 6034 auto index = var.indexOf("="); 6035 if(index == -1) 6036 hash[var] = ""; 6037 else { 6038 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 6039 } 6040 } 6041 6042 return hash; 6043 } 6044 6045 ///. 6046 /*private*/ void updateQueryString(string[string] vars) { 6047 string href = getAttribute("href"); 6048 6049 auto question = href.indexOf("?"); 6050 if(question != -1) 6051 href = href[0..question]; 6052 6053 string frag = ""; 6054 auto fragment = href.indexOf("#"); 6055 if(fragment != -1) { 6056 frag = href[fragment..$]; 6057 href = href[0..fragment]; 6058 } 6059 6060 string query = "?"; 6061 bool first = true; 6062 foreach(name, value; vars) { 6063 if(!first) 6064 query ~= "&"; 6065 else 6066 first = false; 6067 6068 query ~= encodeComponent(name); 6069 if(value.length) 6070 query ~= "=" ~ encodeComponent(value); 6071 } 6072 6073 if(query != "?") 6074 href ~= query; 6075 6076 href ~= frag; 6077 6078 setAttribute("href", href); 6079 } 6080 6081 /// Sets or adds the variable with the given name to the given value 6082 /// It automatically URI encodes the values and takes care of the ? and &. 6083 override void setValue(string name, string variable) { 6084 auto vars = variablesHash(); 6085 vars[name] = variable; 6086 6087 updateQueryString(vars); 6088 } 6089 6090 /// Removes the given variable from the query string 6091 void removeValue(string name) { 6092 auto vars = variablesHash(); 6093 vars.remove(name); 6094 6095 updateQueryString(vars); 6096 } 6097 6098 /* 6099 ///. 6100 override string toString() { 6101 6102 } 6103 6104 ///. 6105 override string getAttribute(string name) { 6106 if(name == "href") { 6107 6108 } else 6109 return super.getAttribute(name); 6110 } 6111 */ 6112 } 6113 6114 ///. 6115 /// Group: implementations 6116 class Form : Element { 6117 6118 ///. 6119 this(Document _parentDocument) { 6120 super(_parentDocument); 6121 tagName = "form"; 6122 } 6123 6124 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 6125 auto t = this.querySelector("fieldset div"); 6126 if(t is null) 6127 return super.addField(label, name, type, fieldOptions); 6128 else 6129 return t.addField(label, name, type, fieldOptions); 6130 } 6131 6132 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 6133 auto type = "text"; 6134 auto t = this.querySelector("fieldset div"); 6135 if(t is null) 6136 return super.addField(label, name, type, fieldOptions); 6137 else 6138 return t.addField(label, name, type, fieldOptions); 6139 } 6140 6141 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6142 auto t = this.querySelector("fieldset div"); 6143 if(t is null) 6144 return super.addField(label, name, options, fieldOptions); 6145 else 6146 return t.addField(label, name, options, fieldOptions); 6147 } 6148 6149 override void setValue(string field, string value) { 6150 setValue(field, value, true); 6151 } 6152 6153 // FIXME: doesn't handle arrays; multiple fields can have the same name 6154 6155 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6156 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6157 /// the checked/selected attribute from all, and adds it to the one matching the value. 6158 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6159 6160 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6161 /// Otherwise, it makes a new input with type=hidden to keep the value. 6162 void setValue(string field, string value, bool makeNew) { 6163 auto eles = getField(field); 6164 if(eles.length == 0) { 6165 if(makeNew) { 6166 addInput(field, value); 6167 return; 6168 } else 6169 throw new Exception("form field does not exist"); 6170 } 6171 6172 if(eles.length == 1) { 6173 auto e = eles[0]; 6174 switch(e.tagName) { 6175 default: assert(0); 6176 case "textarea": 6177 e.innerText = value; 6178 break; 6179 case "input": 6180 string type = e.getAttribute("type"); 6181 if(type is null) { 6182 e.value = value; 6183 return; 6184 } 6185 switch(type) { 6186 case "checkbox": 6187 case "radio": 6188 if(value.length && value != "false") 6189 e.setAttribute("checked", "checked"); 6190 else 6191 e.removeAttribute("checked"); 6192 break; 6193 default: 6194 e.value = value; 6195 return; 6196 } 6197 break; 6198 case "select": 6199 bool found = false; 6200 foreach(child; e.tree) { 6201 if(child.tagName != "option") 6202 continue; 6203 string val = child.getAttribute("value"); 6204 if(val is null) 6205 val = child.innerText; 6206 if(val == value) { 6207 child.setAttribute("selected", "selected"); 6208 found = true; 6209 } else 6210 child.removeAttribute("selected"); 6211 } 6212 6213 if(!found) { 6214 e.addChild("option", value) 6215 .setAttribute("selected", "selected"); 6216 } 6217 break; 6218 } 6219 } else { 6220 // assume radio boxes 6221 foreach(e; eles) { 6222 string val = e.getAttribute("value"); 6223 //if(val is null) 6224 // throw new Exception("don't know what to do with radio boxes with null value"); 6225 if(val == value) 6226 e.setAttribute("checked", "checked"); 6227 else 6228 e.removeAttribute("checked"); 6229 } 6230 } 6231 } 6232 6233 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6234 /// it makes no attempt to find and modify existing elements in the form to the new values. 6235 void addValueArray(string key, string[] arrayOfValues) { 6236 foreach(arr; arrayOfValues) 6237 addChild("input", key, arr); 6238 } 6239 6240 /// Gets the value of the field; what would be given if it submitted right now. (so 6241 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6242 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6243 string getValue(string field) { 6244 auto eles = getField(field); 6245 if(eles.length == 0) 6246 return ""; 6247 if(eles.length == 1) { 6248 auto e = eles[0]; 6249 switch(e.tagName) { 6250 default: assert(0); 6251 case "input": 6252 if(e.type == "checkbox") { 6253 if(e.checked) 6254 return e.value.length ? e.value : "checked"; 6255 return ""; 6256 } else 6257 return e.value; 6258 case "textarea": 6259 return e.innerText; 6260 case "select": 6261 foreach(child; e.tree) { 6262 if(child.tagName != "option") 6263 continue; 6264 if(child.selected) 6265 return child.value; 6266 } 6267 break; 6268 } 6269 } else { 6270 // assuming radio 6271 foreach(e; eles) { 6272 if(e.checked) 6273 return e.value; 6274 } 6275 } 6276 6277 return ""; 6278 } 6279 6280 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6281 ///. 6282 string getPostableData() { 6283 bool[string] namesDone; 6284 6285 string ret; 6286 bool outputted = false; 6287 6288 foreach(e; getElementsBySelector("[name]")) { 6289 if(e.name in namesDone) 6290 continue; 6291 6292 if(outputted) 6293 ret ~= "&"; 6294 else 6295 outputted = true; 6296 6297 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 6298 6299 namesDone[e.name] = true; 6300 } 6301 6302 return ret; 6303 } 6304 6305 /// Gets the actual elements with the given name 6306 Element[] getField(string name) { 6307 Element[] ret; 6308 foreach(e; tree) { 6309 if(e.name == name) 6310 ret ~= e; 6311 } 6312 return ret; 6313 } 6314 6315 /// Grabs the <label> with the given for tag, if there is one. 6316 Element getLabel(string forId) { 6317 foreach(e; tree) 6318 if(e.tagName == "label" && e.getAttribute("for") == forId) 6319 return e; 6320 return null; 6321 } 6322 6323 /// Adds a new INPUT field to the end of the form with the given attributes. 6324 Element addInput(string name, string value, string type = "hidden") { 6325 auto e = new Element(parentDocument, "input", null, true); 6326 e.name = name; 6327 e.value = value; 6328 e.type = type; 6329 6330 appendChild(e); 6331 6332 return e; 6333 } 6334 6335 /// Removes the given field from the form. It finds the element and knocks it right out. 6336 void removeField(string name) { 6337 foreach(e; getField(name)) 6338 e.parentNode.removeChild(e); 6339 } 6340 6341 /+ 6342 /// Returns all form members. 6343 @property Element[] elements() { 6344 6345 } 6346 6347 ///. 6348 string opDispatch(string name)(string v = null) 6349 // filter things that should actually be attributes on the form 6350 if( name != "method" && name != "action" && name != "enctype" 6351 && name != "style" && name != "name" && name != "id" && name != "class") 6352 { 6353 6354 } 6355 +/ 6356 /+ 6357 void submit() { 6358 // take its elements and submit them through http 6359 } 6360 +/ 6361 } 6362 6363 import std.conv; 6364 6365 ///. 6366 /// Group: implementations 6367 class Table : Element { 6368 6369 ///. 6370 this(Document _parentDocument) { 6371 super(_parentDocument); 6372 tagName = "table"; 6373 } 6374 6375 /// Creates an element with the given type and content. 6376 Element th(T)(T t) { 6377 Element e; 6378 if(parentDocument !is null) 6379 e = parentDocument.createElement("th"); 6380 else 6381 e = Element.make("th"); 6382 static if(is(T == Html)) 6383 e.innerHTML = t; 6384 else 6385 e.innerText = to!string(t); 6386 return e; 6387 } 6388 6389 /// ditto 6390 Element td(T)(T t) { 6391 Element e; 6392 if(parentDocument !is null) 6393 e = parentDocument.createElement("td"); 6394 else 6395 e = Element.make("td"); 6396 static if(is(T == Html)) 6397 e.innerHTML = t; 6398 else 6399 e.innerText = to!string(t); 6400 return e; 6401 } 6402 6403 /// . 6404 Element appendHeaderRow(T...)(T t) { 6405 return appendRowInternal("th", "thead", t); 6406 } 6407 6408 /// . 6409 Element appendFooterRow(T...)(T t) { 6410 return appendRowInternal("td", "tfoot", t); 6411 } 6412 6413 /// . 6414 Element appendRow(T...)(T t) { 6415 return appendRowInternal("td", "tbody", t); 6416 } 6417 6418 void addColumnClasses(string[] classes...) { 6419 auto grid = getGrid(); 6420 foreach(row; grid) 6421 foreach(i, cl; classes) { 6422 if(cl.length) 6423 if(i < row.length) 6424 row[i].addClass(cl); 6425 } 6426 } 6427 6428 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6429 Element row = Element.make("tr"); 6430 6431 foreach(e; t) { 6432 static if(is(typeof(e) : Element)) { 6433 if(e.tagName == "td" || e.tagName == "th") 6434 row.appendChild(e); 6435 else { 6436 Element a = Element.make(innerType); 6437 6438 a.appendChild(e); 6439 6440 row.appendChild(a); 6441 } 6442 } else static if(is(typeof(e) == Html)) { 6443 Element a = Element.make(innerType); 6444 a.innerHTML = e.source; 6445 row.appendChild(a); 6446 } else static if(is(typeof(e) == Element[])) { 6447 Element a = Element.make(innerType); 6448 foreach(ele; e) 6449 a.appendChild(ele); 6450 row.appendChild(a); 6451 } else static if(is(typeof(e) == string[])) { 6452 foreach(ele; e) { 6453 Element a = Element.make(innerType); 6454 a.innerText = to!string(ele); 6455 row.appendChild(a); 6456 } 6457 } else { 6458 Element a = Element.make(innerType); 6459 a.innerText = to!string(e); 6460 row.appendChild(a); 6461 } 6462 } 6463 6464 foreach(e; children) { 6465 if(e.tagName == findType) { 6466 e.appendChild(row); 6467 return row; 6468 } 6469 } 6470 6471 // the type was not found if we are here... let's add it so it is well-formed 6472 auto lol = this.addChild(findType); 6473 lol.appendChild(row); 6474 6475 return row; 6476 } 6477 6478 ///. 6479 Element captionElement() { 6480 Element cap; 6481 foreach(c; children) { 6482 if(c.tagName == "caption") { 6483 cap = c; 6484 break; 6485 } 6486 } 6487 6488 if(cap is null) { 6489 cap = Element.make("caption"); 6490 appendChild(cap); 6491 } 6492 6493 return cap; 6494 } 6495 6496 ///. 6497 @property string caption() { 6498 return captionElement().innerText; 6499 } 6500 6501 ///. 6502 @property void caption(string text) { 6503 captionElement().innerText = text; 6504 } 6505 6506 /// Gets the logical layout of the table as a rectangular grid of 6507 /// cells. It considers rowspan and colspan. A cell with a large 6508 /// span is represented in the grid by being referenced several times. 6509 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6510 /// <tfoot> portion if you pass one. 6511 /// 6512 /// Note: the rectangular grid might include null cells. 6513 /// 6514 /// This is kinda expensive so you should call once when you want the grid, 6515 /// then do lookups on the returned array. 6516 TableCell[][] getGrid(Element tablePortition = null) 6517 in { 6518 if(tablePortition is null) 6519 assert(tablePortition is null); 6520 else { 6521 assert(tablePortition !is null); 6522 assert(tablePortition.parentNode is this); 6523 assert( 6524 tablePortition.tagName == "tbody" 6525 || 6526 tablePortition.tagName == "tfoot" 6527 || 6528 tablePortition.tagName == "thead" 6529 ); 6530 } 6531 } 6532 body { 6533 if(tablePortition is null) 6534 tablePortition = this; 6535 6536 TableCell[][] ret; 6537 6538 // FIXME: will also return rows of sub tables! 6539 auto rows = tablePortition.getElementsByTagName("tr"); 6540 ret.length = rows.length; 6541 6542 int maxLength = 0; 6543 6544 int insertCell(int row, int position, TableCell cell) { 6545 if(row >= ret.length) 6546 return position; // not supposed to happen - a rowspan is prolly too big. 6547 6548 if(position == -1) { 6549 position++; 6550 foreach(item; ret[row]) { 6551 if(item is null) 6552 break; 6553 position++; 6554 } 6555 } 6556 6557 if(position < ret[row].length) 6558 ret[row][position] = cell; 6559 else 6560 foreach(i; ret[row].length .. position + 1) { 6561 if(i == position) 6562 ret[row] ~= cell; 6563 else 6564 ret[row] ~= null; 6565 } 6566 return position; 6567 } 6568 6569 foreach(i, rowElement; rows) { 6570 auto row = cast(TableRow) rowElement; 6571 assert(row !is null); 6572 assert(i < ret.length); 6573 6574 int position = 0; 6575 foreach(cellElement; rowElement.childNodes) { 6576 auto cell = cast(TableCell) cellElement; 6577 if(cell is null) 6578 continue; 6579 6580 // FIXME: colspan == 0 or rowspan == 0 6581 // is supposed to mean fill in the rest of 6582 // the table, not skip it 6583 foreach(int j; 0 .. cell.colspan) { 6584 foreach(int k; 0 .. cell.rowspan) 6585 // if the first row, always append. 6586 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6587 position++; 6588 } 6589 } 6590 6591 if(ret[i].length > maxLength) 6592 maxLength = cast(int) ret[i].length; 6593 } 6594 6595 // want to ensure it's rectangular 6596 foreach(ref r; ret) { 6597 foreach(i; r.length .. maxLength) 6598 r ~= null; 6599 } 6600 6601 return ret; 6602 } 6603 } 6604 6605 /// Represents a table row element - a <tr> 6606 /// Group: implementations 6607 class TableRow : Element { 6608 ///. 6609 this(Document _parentDocument) { 6610 super(_parentDocument); 6611 tagName = "tr"; 6612 } 6613 6614 // FIXME: the standard says there should be a lot more in here, 6615 // but meh, I never use it and it's a pain to implement. 6616 } 6617 6618 /// Represents anything that can be a table cell - <td> or <th> html. 6619 /// Group: implementations 6620 class TableCell : Element { 6621 ///. 6622 this(Document _parentDocument, string _tagName) { 6623 super(_parentDocument, _tagName); 6624 } 6625 6626 @property int rowspan() const { 6627 int ret = 1; 6628 auto it = getAttribute("rowspan"); 6629 if(it.length) 6630 ret = to!int(it); 6631 return ret; 6632 } 6633 6634 @property int colspan() const { 6635 int ret = 1; 6636 auto it = getAttribute("colspan"); 6637 if(it.length) 6638 ret = to!int(it); 6639 return ret; 6640 } 6641 6642 @property int rowspan(int i) { 6643 setAttribute("rowspan", to!string(i)); 6644 return i; 6645 } 6646 6647 @property int colspan(int i) { 6648 setAttribute("colspan", to!string(i)); 6649 return i; 6650 } 6651 6652 } 6653 6654 6655 ///. 6656 /// Group: implementations 6657 class MarkupException : Exception { 6658 6659 ///. 6660 this(string message, string file = __FILE__, size_t line = __LINE__) { 6661 super(message, file, line); 6662 } 6663 } 6664 6665 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6666 /// Group: implementations 6667 class ElementNotFoundException : Exception { 6668 6669 /// type == kind of element you were looking for and search == a selector describing the search. 6670 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6671 this.searchContext = searchContext; 6672 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6673 } 6674 6675 Element searchContext; 6676 } 6677 6678 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6679 /// 6680 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6681 /// Group: core_functionality 6682 struct Html { 6683 /// This string holds the actual html. Use it to retrieve the contents. 6684 string source; 6685 } 6686 6687 // for the observers 6688 enum DomMutationOperations { 6689 setAttribute, 6690 removeAttribute, 6691 appendChild, // tagname, attributes[], innerHTML 6692 insertBefore, 6693 truncateChildren, 6694 removeChild, 6695 appendHtml, 6696 replaceHtml, 6697 appendText, 6698 replaceText, 6699 replaceTextOnly 6700 } 6701 6702 // and for observers too 6703 struct DomMutationEvent { 6704 DomMutationOperations operation; 6705 Element target; 6706 Element related; // what this means differs with the operation 6707 Element related2; 6708 string relatedString; 6709 string relatedString2; 6710 } 6711 6712 6713 private immutable static string[] selfClosedElements = [ 6714 // html 4 6715 "img", "hr", "input", "br", "col", "link", "meta", 6716 // html 5 6717 "source" ]; 6718 6719 private immutable static string[] inlineElements = [ 6720 "span", "strong", "em", "b", "i", "a" 6721 ]; 6722 6723 6724 static import std.conv; 6725 6726 ///. 6727 int intFromHex(string hex) { 6728 int place = 1; 6729 int value = 0; 6730 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6731 int v; 6732 char q = hex[a]; 6733 if( q >= '0' && q <= '9') 6734 v = q - '0'; 6735 else if (q >= 'a' && q <= 'f') 6736 v = q - 'a' + 10; 6737 else throw new Exception("Illegal hex character: " ~ q); 6738 6739 value += v * place; 6740 6741 place *= 16; 6742 } 6743 6744 return value; 6745 } 6746 6747 6748 // CSS selector handling 6749 6750 // EXTENSIONS 6751 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6752 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6753 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6754 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6755 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6756 6757 6758 6759 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6760 // That might be useful to implement, though I do have parent selectors too. 6761 6762 ///. 6763 static immutable string[] selectorTokens = [ 6764 // It is important that the 2 character possibilities go first here for accurate lexing 6765 "~=", "*=", "|=", "^=", "$=", "!=", // "::" should be there too for full standard 6766 "::", ">>", 6767 "<<", // my any-parent extension (reciprocal of whitespace) 6768 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6769 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6770 ]; // other is white space or a name. 6771 6772 ///. 6773 sizediff_t idToken(string str, sizediff_t position) { 6774 sizediff_t tid = -1; 6775 char c = str[position]; 6776 foreach(a, token; selectorTokens) 6777 6778 if(c == token[0]) { 6779 if(token.length > 1) { 6780 if(position + 1 >= str.length || str[position+1] != token[1]) 6781 continue; // not this token 6782 } 6783 tid = a; 6784 break; 6785 } 6786 return tid; 6787 } 6788 6789 ///. 6790 // look, ma, no phobos! 6791 // new lexer by ketmar 6792 string[] lexSelector (string selstr) { 6793 6794 static sizediff_t idToken (string str, size_t stpos) { 6795 char c = str[stpos]; 6796 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6797 if (c == token[0]) { 6798 if (token.length > 1) { 6799 assert(token.length == 2, token); // we don't have 3-char tokens yet 6800 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6801 } 6802 return tidx; 6803 } 6804 } 6805 return -1; 6806 } 6807 6808 // skip spaces and comments 6809 static string removeLeadingBlanks (string str) { 6810 size_t curpos = 0; 6811 while (curpos < str.length) { 6812 immutable char ch = str[curpos]; 6813 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6814 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6815 // comment 6816 curpos += 2; 6817 while (curpos < str.length) { 6818 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6819 curpos += 2; 6820 break; 6821 } 6822 ++curpos; 6823 } 6824 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6825 ++curpos; 6826 6827 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6828 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6829 // That is not the same as ".foo.bar". If the space is stripped, important 6830 // information is lost, despite the tokens being separatable anyway. 6831 // 6832 // The parser really needs to be aware of the presence of a space. 6833 } else { 6834 break; 6835 } 6836 } 6837 return str[curpos..$]; 6838 } 6839 6840 static bool isBlankAt() (string str, size_t pos) { 6841 // we should consider unicode spaces too, but... unicode sux anyway. 6842 return 6843 (pos < str.length && // in string 6844 (str[pos] <= 32 || // space 6845 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6846 } 6847 6848 string[] tokens; 6849 // lexx it! 6850 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6851 if(selstr[0] == '\"' || selstr[0] == '\'') { 6852 auto end = selstr[0]; 6853 auto pos = 1; 6854 bool escaping; 6855 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6856 if(escaping) 6857 escaping = false; 6858 else if(selstr[pos] == '\\') 6859 escaping = true; 6860 pos++; 6861 } 6862 6863 // FIXME: do better unescaping 6864 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6865 if(pos+1 >= selstr.length) 6866 assert(0, selstr); 6867 selstr = selstr[pos + 1.. $]; 6868 continue; 6869 } 6870 6871 6872 // no tokens starts with escape 6873 immutable tid = idToken(selstr, 0); 6874 if (tid >= 0) { 6875 // special token 6876 tokens ~= selectorTokens[tid]; // it's funnier this way 6877 selstr = selstr[selectorTokens[tid].length..$]; 6878 continue; 6879 } 6880 // from start to space or special token 6881 size_t escapePos = size_t.max; 6882 size_t curpos = 0; // i can has chizburger^w escape at the start 6883 while (curpos < selstr.length) { 6884 if (selstr[curpos] == '\\') { 6885 // this is escape, just skip it and next char 6886 if (escapePos == size_t.max) escapePos = curpos; 6887 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6888 } else { 6889 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6890 ++curpos; 6891 } 6892 } 6893 // identifier 6894 if (escapePos != size_t.max) { 6895 // i hate it when it happens 6896 string id = selstr[0..escapePos]; 6897 while (escapePos < curpos) { 6898 if (curpos-escapePos < 2) break; 6899 id ~= selstr[escapePos+1]; // escaped char 6900 escapePos += 2; 6901 immutable stp = escapePos; 6902 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6903 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6904 } 6905 if (id.length > 0) tokens ~= id; 6906 } else { 6907 tokens ~= selstr[0..curpos]; 6908 } 6909 selstr = selstr[curpos..$]; 6910 } 6911 return tokens; 6912 } 6913 version(unittest_domd_lexer) unittest { 6914 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6915 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6916 assert(lexSelector(r" < <") == ["<", "<"]); 6917 assert(lexSelector(r" <<") == ["<<"]); 6918 assert(lexSelector(r" <</") == ["<<", "/"]); 6919 assert(lexSelector(r" <</*") == ["<<"]); 6920 assert(lexSelector(r" <\</*") == ["<", "<"]); 6921 assert(lexSelector(r"heh\") == ["heh"]); 6922 assert(lexSelector(r"alice \") == ["alice"]); 6923 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6924 } 6925 6926 ///. 6927 struct SelectorPart { 6928 string tagNameFilter; ///. 6929 string[] attributesPresent; /// [attr] 6930 string[2][] attributesEqual; /// [attr=value] 6931 string[2][] attributesStartsWith; /// [attr^=value] 6932 string[2][] attributesEndsWith; /// [attr$=value] 6933 // split it on space, then match to these 6934 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 6935 // split it on dash, then match to these 6936 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 6937 string[2][] attributesInclude; /// [attr*=value] 6938 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 6939 6940 string[] hasSelectors; /// :has(this) 6941 string[] notSelectors; /// :not(this) 6942 6943 ParsedNth[] nthOfType; /// . 6944 ParsedNth[] nthLastOfType; /// . 6945 ParsedNth[] nthChild; /// . 6946 6947 bool firstChild; ///. 6948 bool lastChild; ///. 6949 6950 bool firstOfType; /// . 6951 bool lastOfType; /// . 6952 6953 bool emptyElement; ///. 6954 bool whitespaceOnly; /// 6955 bool oddChild; ///. 6956 bool evenChild; ///. 6957 6958 bool rootElement; ///. 6959 6960 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 6961 6962 bool isCleanSlateExceptSeparation() { 6963 auto cp = this; 6964 cp.separation = -1; 6965 return cp is SelectorPart.init; 6966 } 6967 6968 ///. 6969 string toString() { 6970 string ret; 6971 switch(separation) { 6972 default: assert(0); 6973 case -1: break; 6974 case 0: ret ~= " "; break; 6975 case 1: ret ~= " > "; break; 6976 case 2: ret ~= " + "; break; 6977 case 3: ret ~= " ~ "; break; 6978 case 4: ret ~= " < "; break; 6979 } 6980 ret ~= tagNameFilter; 6981 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 6982 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 6983 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 6984 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 6985 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 6986 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 6987 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 6988 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 6989 6990 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 6991 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 6992 6993 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 6994 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 6995 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 6996 6997 if(firstChild) ret ~= ":first-child"; 6998 if(lastChild) ret ~= ":last-child"; 6999 if(firstOfType) ret ~= ":first-of-type"; 7000 if(lastOfType) ret ~= ":last-of-type"; 7001 if(emptyElement) ret ~= ":empty"; 7002 if(whitespaceOnly) ret ~= ":whitespace-only"; 7003 if(oddChild) ret ~= ":odd-child"; 7004 if(evenChild) ret ~= ":even-child"; 7005 if(rootElement) ret ~= ":root"; 7006 7007 return ret; 7008 } 7009 7010 // USEFUL 7011 ///. 7012 bool matchElement(Element e) { 7013 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 7014 // Each individual call is reasonably fast already, but it adds up. 7015 if(e is null) return false; 7016 if(e.nodeType != 1) return false; 7017 7018 if(tagNameFilter != "" && tagNameFilter != "*") 7019 if(e.tagName != tagNameFilter) 7020 return false; 7021 if(firstChild) { 7022 if(e.parentNode is null) 7023 return false; 7024 if(e.parentNode.childElements[0] !is e) 7025 return false; 7026 } 7027 if(lastChild) { 7028 if(e.parentNode is null) 7029 return false; 7030 auto ce = e.parentNode.childElements; 7031 if(ce[$-1] !is e) 7032 return false; 7033 } 7034 if(firstOfType) { 7035 if(e.parentNode is null) 7036 return false; 7037 auto ce = e.parentNode.childElements; 7038 foreach(c; ce) { 7039 if(c.tagName == e.tagName) { 7040 if(c is e) 7041 return true; 7042 else 7043 return false; 7044 } 7045 } 7046 } 7047 if(lastOfType) { 7048 if(e.parentNode is null) 7049 return false; 7050 auto ce = e.parentNode.childElements; 7051 foreach_reverse(c; ce) { 7052 if(c.tagName == e.tagName) { 7053 if(c is e) 7054 return true; 7055 else 7056 return false; 7057 } 7058 } 7059 } 7060 if(emptyElement) { 7061 if(e.children.length) 7062 return false; 7063 } 7064 if(whitespaceOnly) { 7065 if(e.innerText.strip.length) 7066 return false; 7067 } 7068 if(rootElement) { 7069 if(e.parentNode !is null) 7070 return false; 7071 } 7072 if(oddChild || evenChild) { 7073 if(e.parentNode is null) 7074 return false; 7075 foreach(i, child; e.parentNode.childElements) { 7076 if(child is e) { 7077 if(oddChild && !(i&1)) 7078 return false; 7079 if(evenChild && (i&1)) 7080 return false; 7081 break; 7082 } 7083 } 7084 } 7085 7086 bool matchWithSeparator(string attr, string value, string separator) { 7087 foreach(s; attr.split(separator)) 7088 if(s == value) 7089 return true; 7090 return false; 7091 } 7092 7093 foreach(a; attributesPresent) 7094 if(a !in e.attributes) 7095 return false; 7096 foreach(a; attributesEqual) 7097 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7098 return false; 7099 foreach(a; attributesNotEqual) 7100 // FIXME: maybe it should say null counts... this just bit me. 7101 // I did [attr][attr!=value] to work around. 7102 // 7103 // if it's null, it's not equal, right? 7104 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7105 if(e.getAttribute(a[0]) == a[1]) 7106 return false; 7107 foreach(a; attributesInclude) 7108 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7109 return false; 7110 foreach(a; attributesStartsWith) 7111 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7112 return false; 7113 foreach(a; attributesEndsWith) 7114 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7115 return false; 7116 foreach(a; attributesIncludesSeparatedBySpaces) 7117 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7118 return false; 7119 foreach(a; attributesIncludesSeparatedByDashes) 7120 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7121 return false; 7122 foreach(a; hasSelectors) { 7123 if(e.querySelector(a) is null) 7124 return false; 7125 } 7126 foreach(a; notSelectors) { 7127 auto sel = Selector(a); 7128 if(sel.matchesElement(e)) 7129 return false; 7130 } 7131 7132 foreach(a; nthChild) { 7133 if(e.parentNode is null) 7134 return false; 7135 7136 auto among = e.parentNode.childElements; 7137 7138 if(!a.solvesFor(among, e)) 7139 return false; 7140 } 7141 foreach(a; nthOfType) { 7142 if(e.parentNode is null) 7143 return false; 7144 7145 auto among = e.parentNode.childElements(e.tagName); 7146 7147 if(!a.solvesFor(among, e)) 7148 return false; 7149 } 7150 foreach(a; nthLastOfType) { 7151 if(e.parentNode is null) 7152 return false; 7153 7154 auto among = retro(e.parentNode.childElements(e.tagName)); 7155 7156 if(!a.solvesFor(among, e)) 7157 return false; 7158 } 7159 7160 return true; 7161 } 7162 } 7163 7164 struct ParsedNth { 7165 int multiplier; 7166 int adder; 7167 7168 string of; 7169 7170 this(string text) { 7171 auto original = text; 7172 consumeWhitespace(text); 7173 if(text.startsWith("odd")) { 7174 multiplier = 2; 7175 adder = 1; 7176 7177 text = text[3 .. $]; 7178 } else if(text.startsWith("even")) { 7179 multiplier = 2; 7180 adder = 1; 7181 7182 text = text[4 .. $]; 7183 } else { 7184 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7185 consumeWhitespace(text); 7186 if(text.length && text[0] == 'n') { 7187 multiplier = n; 7188 text = text[1 .. $]; 7189 consumeWhitespace(text); 7190 if(text.length) { 7191 if(text[0] == '+') { 7192 text = text[1 .. $]; 7193 adder = parseNumber(text); 7194 } else if(text[0] == '-') { 7195 text = text[1 .. $]; 7196 adder = -parseNumber(text); 7197 } else if(text[0] == 'o') { 7198 // continue, this is handled below 7199 } else 7200 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7201 } 7202 } else { 7203 adder = n; 7204 } 7205 } 7206 7207 consumeWhitespace(text); 7208 if(text.startsWith("of")) { 7209 text = text[2 .. $]; 7210 consumeWhitespace(text); 7211 of = text[0 .. $]; 7212 } 7213 } 7214 7215 string toString() { 7216 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7217 } 7218 7219 bool solvesFor(R)(R elements, Element e) { 7220 int idx = 1; 7221 bool found = false; 7222 foreach(ele; elements) { 7223 if(of.length) { 7224 auto sel = Selector(of); 7225 if(!sel.matchesElement(ele)) 7226 continue; 7227 } 7228 if(ele is e) { 7229 found = true; 7230 break; 7231 } 7232 idx++; 7233 } 7234 if(!found) return false; 7235 7236 // multiplier* n + adder = idx 7237 // if there is a solution for integral n, it matches 7238 7239 idx -= adder; 7240 if(multiplier) { 7241 if(idx % multiplier == 0) 7242 return true; 7243 } else { 7244 return idx == 0; 7245 } 7246 return false; 7247 } 7248 7249 private void consumeWhitespace(ref string text) { 7250 while(text.length && text[0] == ' ') 7251 text = text[1 .. $]; 7252 } 7253 7254 private int parseNumber(ref string text) { 7255 consumeWhitespace(text); 7256 if(text.length == 0) return 0; 7257 bool negative = text[0] == '-'; 7258 if(text[0] == '+') 7259 text = text[1 .. $]; 7260 if(negative) text = text[1 .. $]; 7261 int i = 0; 7262 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7263 i++; 7264 if(i == 0) 7265 return 0; 7266 int cool = to!int(text[0 .. i]); 7267 text = text[i .. $]; 7268 return negative ? -cool : cool; 7269 } 7270 } 7271 7272 // USEFUL 7273 ///. 7274 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 7275 Element[] ret; 7276 if(!parts.length) { 7277 return [start]; // the null selector only matches the start point; it 7278 // is what terminates the recursion 7279 } 7280 7281 auto part = parts[0]; 7282 //writeln("checking ", part, " against ", start, " with ", part.separation); 7283 switch(part.separation) { 7284 default: assert(0); 7285 case -1: 7286 case 0: // tree 7287 foreach(e; start.tree) { 7288 if(part.separation == 0 && start is e) 7289 continue; // space doesn't match itself! 7290 if(part.matchElement(e)) { 7291 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7292 } 7293 } 7294 break; 7295 case 1: // children 7296 foreach(e; start.childNodes) { 7297 if(part.matchElement(e)) { 7298 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7299 } 7300 } 7301 break; 7302 case 2: // next-sibling 7303 auto e = start.nextSibling("*"); 7304 if(part.matchElement(e)) 7305 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7306 break; 7307 case 3: // younger sibling 7308 auto tmp = start.parentNode; 7309 if(tmp !is null) { 7310 sizediff_t pos = -1; 7311 auto children = tmp.childElements; 7312 foreach(i, child; children) { 7313 if(child is start) { 7314 pos = i; 7315 break; 7316 } 7317 } 7318 assert(pos != -1); 7319 foreach(e; children[pos+1..$]) { 7320 if(part.matchElement(e)) 7321 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7322 } 7323 } 7324 break; 7325 case 4: // immediate parent node, an extension of mine to walk back up the tree 7326 auto e = start.parentNode; 7327 if(part.matchElement(e)) { 7328 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7329 } 7330 /* 7331 Example of usefulness: 7332 7333 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7334 7335 table th < tr 7336 7337 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7338 */ 7339 break; 7340 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7341 /* 7342 Like with the < operator, this is best used to find some parent of a particular known element. 7343 7344 Say you have an anchor inside a 7345 */ 7346 } 7347 7348 return ret; 7349 } 7350 7351 /++ 7352 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7353 7354 See_Also: 7355 $(LIST 7356 * [Element.querySelector] 7357 * [Element.querySelectorAll] 7358 * [Element.matches] 7359 * [Element.closest] 7360 * [Document.querySelector] 7361 * [Document.querySelectorAll] 7362 ) 7363 +/ 7364 /// Group: core_functionality 7365 struct Selector { 7366 SelectorComponent[] components; 7367 string original; 7368 /++ 7369 Parses the selector string and constructs the usable structure. 7370 +/ 7371 this(string cssSelector) { 7372 components = parseSelectorString(cssSelector); 7373 original = cssSelector; 7374 } 7375 7376 /++ 7377 Returns true if the given element matches this selector, 7378 considered relative to an arbitrary element. 7379 7380 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7381 with [std.algorithm.iteration.filter]: 7382 7383 --- 7384 Selector sel = Selector("foo > bar"); 7385 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7386 --- 7387 +/ 7388 bool matchesElement(Element e, Element relativeTo = null) { 7389 foreach(component; components) 7390 if(component.matchElement(e, relativeTo)) 7391 return true; 7392 7393 return false; 7394 } 7395 7396 /++ 7397 Reciprocal of [Element.querySelectorAll] 7398 +/ 7399 Element[] getMatchingElements(Element start) { 7400 Element[] ret; 7401 foreach(component; components) 7402 ret ~= getElementsBySelectorParts(start, component.parts); 7403 return removeDuplicates(ret); 7404 } 7405 7406 /++ 7407 Like [getMatchingElements], but returns a lazy range. Be careful 7408 about mutating the dom as you iterate through this. 7409 +/ 7410 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7411 import std.algorithm.iteration; 7412 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7413 } 7414 7415 7416 /// Returns the string this was built from 7417 string toString() { 7418 return original; 7419 } 7420 7421 /++ 7422 Returns a string from the parsed result 7423 7424 7425 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7426 +/ 7427 string parsedToString() { 7428 string ret; 7429 7430 foreach(idx, component; components) { 7431 if(idx) ret ~= ", "; 7432 ret ~= component.toString(); 7433 } 7434 7435 return ret; 7436 } 7437 } 7438 7439 ///. 7440 struct SelectorComponent { 7441 ///. 7442 SelectorPart[] parts; 7443 7444 ///. 7445 string toString() { 7446 string ret; 7447 foreach(part; parts) 7448 ret ~= part.toString(); 7449 return ret; 7450 } 7451 7452 // USEFUL 7453 ///. 7454 Element[] getElements(Element start) { 7455 return removeDuplicates(getElementsBySelectorParts(start, parts)); 7456 } 7457 7458 // USEFUL (but not implemented) 7459 /// If relativeTo == null, it assumes the root of the parent document. 7460 bool matchElement(Element e, Element relativeTo = null) { 7461 if(e is null) return false; 7462 Element where = e; 7463 int lastSeparation = -1; 7464 7465 auto lparts = parts; 7466 7467 if(parts.length && parts[0].separation > 0) { 7468 // if it starts with a non-trivial separator, inject 7469 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7470 // which implies html 7471 7472 // there is probably a MUCH better way to do this. 7473 auto dummy = SelectorPart.init; 7474 dummy.tagNameFilter = "*"; 7475 dummy.separation = 0; 7476 lparts = dummy ~ lparts; 7477 } 7478 7479 foreach(part; retro(lparts)) { 7480 7481 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7482 // writeln(parts); 7483 7484 if(lastSeparation == -1) { 7485 if(!part.matchElement(where)) 7486 return false; 7487 } else if(lastSeparation == 0) { // generic parent 7488 // need to go up the whole chain 7489 where = where.parentNode; 7490 7491 while(where !is null) { 7492 if(part.matchElement(where)) 7493 break; 7494 7495 if(where is relativeTo) 7496 return false; 7497 7498 where = where.parentNode; 7499 } 7500 7501 if(where is null) 7502 return false; 7503 } else if(lastSeparation == 1) { // the > operator 7504 where = where.parentNode; 7505 7506 if(!part.matchElement(where)) 7507 return false; 7508 } else if(lastSeparation == 2) { // the + operator 7509 where = where.previousSibling("*"); 7510 7511 if(!part.matchElement(where)) 7512 return false; 7513 } else if(lastSeparation == 3) { // the ~ operator 7514 where = where.previousSibling("*"); 7515 while(where !is null) { 7516 if(part.matchElement(where)) 7517 break; 7518 7519 if(where is relativeTo) 7520 return false; 7521 7522 where = where.previousSibling("*"); 7523 } 7524 7525 if(where is null) 7526 return false; 7527 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7528 // FIXME 7529 } 7530 7531 lastSeparation = part.separation; 7532 7533 if(where is relativeTo) 7534 return false; // at end of line, if we aren't done by now, the match fails 7535 } 7536 return true; // if we got here, it is a success 7537 } 7538 7539 // the string should NOT have commas. Use parseSelectorString for that instead 7540 ///. 7541 static SelectorComponent fromString(string selector) { 7542 return parseSelector(lexSelector(selector)); 7543 } 7544 } 7545 7546 ///. 7547 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7548 SelectorComponent[] ret; 7549 auto tokens = lexSelector(selector); // this will parse commas too 7550 // and now do comma-separated slices (i haz phobosophobia!) 7551 while (tokens.length > 0) { 7552 size_t end = 0; 7553 while (end < tokens.length && tokens[end] != ",") ++end; 7554 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7555 if (tokens.length-end < 2) break; 7556 tokens = tokens[end+1..$]; 7557 } 7558 return ret; 7559 } 7560 7561 ///. 7562 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7563 SelectorComponent s; 7564 7565 SelectorPart current; 7566 void commit() { 7567 // might as well skip null items 7568 if(!current.isCleanSlateExceptSeparation()) { 7569 s.parts ~= current; 7570 current = current.init; // start right over 7571 } 7572 } 7573 enum State { 7574 Starting, 7575 ReadingClass, 7576 ReadingId, 7577 ReadingAttributeSelector, 7578 ReadingAttributeComparison, 7579 ExpectingAttributeCloser, 7580 ReadingPseudoClass, 7581 ReadingAttributeValue, 7582 7583 SkippingFunctionalSelector, 7584 } 7585 State state = State.Starting; 7586 string attributeName, attributeValue, attributeComparison; 7587 int parensCount; 7588 foreach(idx, token; tokens) { 7589 string readFunctionalSelector() { 7590 string s; 7591 if(tokens[idx + 1] != "(") 7592 throw new Exception("parse error"); 7593 int pc = 1; 7594 foreach(t; tokens[idx + 2 .. $]) { 7595 if(t == "(") 7596 pc++; 7597 if(t == ")") 7598 pc--; 7599 if(pc == 0) 7600 break; 7601 s ~= t; 7602 } 7603 7604 return s; 7605 } 7606 7607 sizediff_t tid = -1; 7608 foreach(i, item; selectorTokens) 7609 if(token == item) { 7610 tid = i; 7611 break; 7612 } 7613 final switch(state) { 7614 case State.Starting: // fresh, might be reading an operator or a tagname 7615 if(tid == -1) { 7616 if(!caseSensitiveTags) 7617 token = token.toLower(); 7618 7619 if(current.isCleanSlateExceptSeparation()) { 7620 current.tagNameFilter = token; 7621 // default thing, see comment under "*" below 7622 if(current.separation == -1) current.separation = 0; 7623 } else { 7624 // if it was already set, we must see two thingies 7625 // separated by whitespace... 7626 commit(); 7627 current.separation = 0; // tree 7628 current.tagNameFilter = token; 7629 } 7630 } else { 7631 // Selector operators 7632 switch(token) { 7633 case "*": 7634 current.tagNameFilter = "*"; 7635 // the idea here is if we haven't actually set a separation 7636 // yet (e.g. the > operator), it should assume the generic 7637 // whitespace (descendant) mode to avoid matching self with -1 7638 if(current.separation == -1) current.separation = 0; 7639 break; 7640 case " ": 7641 // If some other separation has already been set, 7642 // this is irrelevant whitespace, so we should skip it. 7643 // this happens in the case of "foo > bar" for example. 7644 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7645 continue; 7646 commit(); 7647 current.separation = 0; // tree 7648 break; 7649 case ">>": 7650 commit(); 7651 current.separation = 0; // alternate syntax for tree from html5 css 7652 break; 7653 case ">": 7654 commit(); 7655 current.separation = 1; // child 7656 break; 7657 case "+": 7658 commit(); 7659 current.separation = 2; // sibling directly after 7660 break; 7661 case "~": 7662 commit(); 7663 current.separation = 3; // any sibling after 7664 break; 7665 case "<": 7666 commit(); 7667 current.separation = 4; // immediate parent of 7668 break; 7669 case "[": 7670 state = State.ReadingAttributeSelector; 7671 if(current.separation == -1) current.separation = 0; 7672 break; 7673 case ".": 7674 state = State.ReadingClass; 7675 if(current.separation == -1) current.separation = 0; 7676 break; 7677 case "#": 7678 state = State.ReadingId; 7679 if(current.separation == -1) current.separation = 0; 7680 break; 7681 case ":": 7682 case "::": 7683 state = State.ReadingPseudoClass; 7684 if(current.separation == -1) current.separation = 0; 7685 break; 7686 7687 default: 7688 assert(0, token); 7689 } 7690 } 7691 break; 7692 case State.ReadingClass: 7693 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7694 state = State.Starting; 7695 break; 7696 case State.ReadingId: 7697 current.attributesEqual ~= ["id", token]; 7698 state = State.Starting; 7699 break; 7700 case State.ReadingPseudoClass: 7701 switch(token) { 7702 case "first-of-type": 7703 current.firstOfType = true; 7704 break; 7705 case "last-of-type": 7706 current.lastOfType = true; 7707 break; 7708 case "only-of-type": 7709 current.firstOfType = true; 7710 current.lastOfType = true; 7711 break; 7712 case "first-child": 7713 current.firstChild = true; 7714 break; 7715 case "last-child": 7716 current.lastChild = true; 7717 break; 7718 case "only-child": 7719 current.firstChild = true; 7720 current.lastChild = true; 7721 break; 7722 case "empty": 7723 // one with no children 7724 current.emptyElement = true; 7725 break; 7726 case "whitespace-only": 7727 current.whitespaceOnly = true; 7728 break; 7729 case "link": 7730 current.attributesPresent ~= "href"; 7731 break; 7732 case "root": 7733 current.rootElement = true; 7734 break; 7735 case "nth-child": 7736 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7737 state = State.SkippingFunctionalSelector; 7738 continue; 7739 case "nth-of-type": 7740 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7741 state = State.SkippingFunctionalSelector; 7742 continue; 7743 case "nth-last-of-type": 7744 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7745 state = State.SkippingFunctionalSelector; 7746 continue; 7747 case "not": 7748 state = State.SkippingFunctionalSelector; 7749 current.notSelectors ~= readFunctionalSelector(); 7750 continue; // now the rest of the parser skips past the parens we just handled 7751 case "has": 7752 state = State.SkippingFunctionalSelector; 7753 current.hasSelectors ~= readFunctionalSelector(); 7754 continue; // now the rest of the parser skips past the parens we just handled 7755 // back to standards though not quite right lol 7756 case "disabled": 7757 current.attributesPresent ~= "disabled"; 7758 break; 7759 case "checked": 7760 current.attributesPresent ~= "checked"; 7761 break; 7762 7763 case "visited", "active", "hover", "target", "focus", "selected": 7764 current.attributesPresent ~= "nothing"; 7765 // FIXME 7766 /* 7767 // defined in the standard, but I don't implement it 7768 case "not": 7769 */ 7770 /+ 7771 // extensions not implemented 7772 //case "text": // takes the text in the element and wraps it in an element, returning it 7773 +/ 7774 goto case; 7775 case "before", "after": 7776 current.attributesPresent ~= "FIXME"; 7777 7778 break; 7779 // My extensions 7780 case "odd-child": 7781 current.oddChild = true; 7782 break; 7783 case "even-child": 7784 current.evenChild = true; 7785 break; 7786 default: 7787 //if(token.indexOf("lang") == -1) 7788 //assert(0, token); 7789 break; 7790 } 7791 state = State.Starting; 7792 break; 7793 case State.SkippingFunctionalSelector: 7794 if(token == "(") { 7795 parensCount++; 7796 } else if(token == ")") { 7797 parensCount--; 7798 } 7799 7800 if(parensCount == 0) 7801 state = State.Starting; 7802 break; 7803 case State.ReadingAttributeSelector: 7804 attributeName = token; 7805 attributeComparison = null; 7806 attributeValue = null; 7807 state = State.ReadingAttributeComparison; 7808 break; 7809 case State.ReadingAttributeComparison: 7810 // FIXME: these things really should be quotable in the proper lexer... 7811 if(token != "]") { 7812 if(token.indexOf("=") == -1) { 7813 // not a comparison; consider it 7814 // part of the attribute 7815 attributeValue ~= token; 7816 } else { 7817 attributeComparison = token; 7818 state = State.ReadingAttributeValue; 7819 } 7820 break; 7821 } 7822 goto case; 7823 case State.ExpectingAttributeCloser: 7824 if(token != "]") { 7825 // not the closer; consider it part of comparison 7826 if(attributeComparison == "") 7827 attributeName ~= token; 7828 else 7829 attributeValue ~= token; 7830 break; 7831 } 7832 7833 // Selector operators 7834 switch(attributeComparison) { 7835 default: assert(0); 7836 case "": 7837 current.attributesPresent ~= attributeName; 7838 break; 7839 case "=": 7840 current.attributesEqual ~= [attributeName, attributeValue]; 7841 break; 7842 case "|=": 7843 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7844 break; 7845 case "~=": 7846 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7847 break; 7848 case "$=": 7849 current.attributesEndsWith ~= [attributeName, attributeValue]; 7850 break; 7851 case "^=": 7852 current.attributesStartsWith ~= [attributeName, attributeValue]; 7853 break; 7854 case "*=": 7855 current.attributesInclude ~= [attributeName, attributeValue]; 7856 break; 7857 case "!=": 7858 current.attributesNotEqual ~= [attributeName, attributeValue]; 7859 break; 7860 } 7861 7862 state = State.Starting; 7863 break; 7864 case State.ReadingAttributeValue: 7865 attributeValue = token; 7866 state = State.ExpectingAttributeCloser; 7867 break; 7868 } 7869 } 7870 7871 commit(); 7872 7873 return s; 7874 } 7875 7876 ///. 7877 Element[] removeDuplicates(Element[] input) { 7878 Element[] ret; 7879 7880 bool[Element] already; 7881 foreach(e; input) { 7882 if(e in already) continue; 7883 already[e] = true; 7884 ret ~= e; 7885 } 7886 7887 return ret; 7888 } 7889 7890 // done with CSS selector handling 7891 7892 7893 // FIXME: use the better parser from html.d 7894 /// This is probably not useful to you unless you're writing a browser or something like that. 7895 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 7896 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 7897 class CssStyle { 7898 ///. 7899 this(string rule, string content) { 7900 rule = rule.strip(); 7901 content = content.strip(); 7902 7903 if(content.length == 0) 7904 return; 7905 7906 originatingRule = rule; 7907 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 7908 7909 foreach(part; content.split(";")) { 7910 part = part.strip(); 7911 if(part.length == 0) 7912 continue; 7913 auto idx = part.indexOf(":"); 7914 if(idx == -1) 7915 continue; 7916 //throw new Exception("Bad css rule (no colon): " ~ part); 7917 7918 Property p; 7919 7920 p.name = part[0 .. idx].strip(); 7921 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 7922 p.givenExplicitly = true; 7923 p.specificity = originatingSpecificity; 7924 7925 properties ~= p; 7926 } 7927 7928 foreach(property; properties) 7929 expandShortForm(property, originatingSpecificity); 7930 } 7931 7932 ///. 7933 Specificity getSpecificityOfRule(string rule) { 7934 Specificity s; 7935 if(rule.length == 0) { // inline 7936 // s.important = 2; 7937 } else { 7938 // FIXME 7939 } 7940 7941 return s; 7942 } 7943 7944 string originatingRule; ///. 7945 Specificity originatingSpecificity; ///. 7946 7947 ///. 7948 union Specificity { 7949 uint score; ///. 7950 // version(little_endian) 7951 ///. 7952 struct { 7953 ubyte tags; ///. 7954 ubyte classes; ///. 7955 ubyte ids; ///. 7956 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 7957 } 7958 } 7959 7960 ///. 7961 struct Property { 7962 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 7963 string name; ///. 7964 string value; ///. 7965 Specificity specificity; ///. 7966 // do we care about the original source rule? 7967 } 7968 7969 ///. 7970 Property[] properties; 7971 7972 ///. 7973 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 7974 string name = unCamelCase(nameGiven); 7975 if(value is null) 7976 return getValue(name); 7977 else 7978 return setValue(name, value, 0x02000000 /* inline specificity */); 7979 } 7980 7981 /// takes dash style name 7982 string getValue(string name) { 7983 foreach(property; properties) 7984 if(property.name == name) 7985 return property.value; 7986 return null; 7987 } 7988 7989 /// takes dash style name 7990 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 7991 value = value.replace("! important", "!important"); 7992 if(value.indexOf("!important") != -1) { 7993 newSpecificity.important = 1; // FIXME 7994 value = value.replace("!important", "").strip(); 7995 } 7996 7997 foreach(ref property; properties) 7998 if(property.name == name) { 7999 if(newSpecificity.score >= property.specificity.score) { 8000 property.givenExplicitly = explicit; 8001 expandShortForm(property, newSpecificity); 8002 return (property.value = value); 8003 } else { 8004 if(name == "display") 8005 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8006 return value; // do nothing - the specificity is too low 8007 } 8008 } 8009 8010 // it's not here... 8011 8012 Property p; 8013 p.givenExplicitly = true; 8014 p.name = name; 8015 p.value = value; 8016 p.specificity = originatingSpecificity; 8017 8018 properties ~= p; 8019 expandShortForm(p, originatingSpecificity); 8020 8021 return value; 8022 } 8023 8024 private void expandQuadShort(string name, string value, Specificity specificity) { 8025 auto parts = value.split(" "); 8026 switch(parts.length) { 8027 case 1: 8028 setValue(name ~"-left", parts[0], specificity, false); 8029 setValue(name ~"-right", parts[0], specificity, false); 8030 setValue(name ~"-top", parts[0], specificity, false); 8031 setValue(name ~"-bottom", parts[0], specificity, false); 8032 break; 8033 case 2: 8034 setValue(name ~"-left", parts[1], specificity, false); 8035 setValue(name ~"-right", parts[1], specificity, false); 8036 setValue(name ~"-top", parts[0], specificity, false); 8037 setValue(name ~"-bottom", parts[0], specificity, false); 8038 break; 8039 case 3: 8040 setValue(name ~"-top", parts[0], specificity, false); 8041 setValue(name ~"-right", parts[1], specificity, false); 8042 setValue(name ~"-bottom", parts[2], specificity, false); 8043 setValue(name ~"-left", parts[2], specificity, false); 8044 8045 break; 8046 case 4: 8047 setValue(name ~"-top", parts[0], specificity, false); 8048 setValue(name ~"-right", parts[1], specificity, false); 8049 setValue(name ~"-bottom", parts[2], specificity, false); 8050 setValue(name ~"-left", parts[3], specificity, false); 8051 break; 8052 default: 8053 assert(0, value); 8054 } 8055 } 8056 8057 ///. 8058 void expandShortForm(Property p, Specificity specificity) { 8059 switch(p.name) { 8060 case "margin": 8061 case "padding": 8062 expandQuadShort(p.name, p.value, specificity); 8063 break; 8064 case "border": 8065 case "outline": 8066 setValue(p.name ~ "-left", p.value, specificity, false); 8067 setValue(p.name ~ "-right", p.value, specificity, false); 8068 setValue(p.name ~ "-top", p.value, specificity, false); 8069 setValue(p.name ~ "-bottom", p.value, specificity, false); 8070 break; 8071 8072 case "border-top": 8073 case "border-bottom": 8074 case "border-left": 8075 case "border-right": 8076 case "outline-top": 8077 case "outline-bottom": 8078 case "outline-left": 8079 case "outline-right": 8080 8081 default: {} 8082 } 8083 } 8084 8085 ///. 8086 override string toString() { 8087 string ret; 8088 if(originatingRule.length) 8089 ret = originatingRule ~ " {"; 8090 8091 foreach(property; properties) { 8092 if(!property.givenExplicitly) 8093 continue; // skip the inferred shit 8094 8095 if(originatingRule.length) 8096 ret ~= "\n\t"; 8097 else 8098 ret ~= " "; 8099 8100 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8101 } 8102 8103 if(originatingRule.length) 8104 ret ~= "\n}\n"; 8105 8106 return ret; 8107 } 8108 } 8109 8110 string cssUrl(string url) { 8111 return "url(\"" ~ url ~ "\")"; 8112 } 8113 8114 /// This probably isn't useful, unless you're writing a browser or something like that. 8115 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8116 /// as text. 8117 /// 8118 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8119 /// that you can apply to your documents to build the complete computedStyle object. 8120 class StyleSheet { 8121 ///. 8122 CssStyle[] rules; 8123 8124 ///. 8125 this(string source) { 8126 // FIXME: handle @ rules and probably could improve lexer 8127 // add nesting? 8128 int state; 8129 string currentRule; 8130 string currentValue; 8131 8132 string* currentThing = ¤tRule; 8133 foreach(c; source) { 8134 handle: switch(state) { 8135 default: assert(0); 8136 case 0: // starting - we assume we're reading a rule 8137 switch(c) { 8138 case '@': 8139 state = 4; 8140 break; 8141 case '/': 8142 state = 1; 8143 break; 8144 case '{': 8145 currentThing = ¤tValue; 8146 break; 8147 case '}': 8148 if(currentThing is ¤tValue) { 8149 rules ~= new CssStyle(currentRule, currentValue); 8150 8151 currentRule = ""; 8152 currentValue = ""; 8153 8154 currentThing = ¤tRule; 8155 } else { 8156 // idk what is going on here. 8157 // check sveit.com to reproduce 8158 currentRule = ""; 8159 currentValue = ""; 8160 } 8161 break; 8162 default: 8163 (*currentThing) ~= c; 8164 } 8165 break; 8166 case 1: // expecting * 8167 if(c == '*') 8168 state = 2; 8169 else { 8170 state = 0; 8171 (*currentThing) ~= "/" ~ c; 8172 } 8173 break; 8174 case 2: // inside comment 8175 if(c == '*') 8176 state = 3; 8177 break; 8178 case 3: // expecting / to end comment 8179 if(c == '/') 8180 state = 0; 8181 else 8182 state = 2; // it's just a comment so no need to append 8183 break; 8184 case 4: 8185 if(c == '{') 8186 state = 5; 8187 if(c == ';') 8188 state = 0; // just skipping import 8189 break; 8190 case 5: 8191 if(c == '}') 8192 state = 0; // skipping font face probably 8193 } 8194 } 8195 } 8196 8197 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8198 void apply(Document document) { 8199 foreach(rule; rules) { 8200 if(rule.originatingRule.length == 0) 8201 continue; // this shouldn't happen here in a stylesheet 8202 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8203 // note: this should be a different object than the inline style 8204 // since givenExplicitly is likely destroyed here 8205 auto current = element.computedStyle; 8206 8207 foreach(item; rule.properties) 8208 current.setValue(item.name, item.value, item.specificity); 8209 } 8210 } 8211 } 8212 } 8213 8214 8215 /// This is kinda private; just a little utility container for use by the ElementStream class. 8216 final class Stack(T) { 8217 this() { 8218 internalLength = 0; 8219 arr = initialBuffer[]; 8220 } 8221 8222 ///. 8223 void push(T t) { 8224 if(internalLength >= arr.length) { 8225 auto oldarr = arr; 8226 if(arr.length < 4096) 8227 arr = new T[arr.length * 2]; 8228 else 8229 arr = new T[arr.length + 4096]; 8230 arr[0 .. oldarr.length] = oldarr[]; 8231 } 8232 8233 arr[internalLength] = t; 8234 internalLength++; 8235 } 8236 8237 ///. 8238 T pop() { 8239 assert(internalLength); 8240 internalLength--; 8241 return arr[internalLength]; 8242 } 8243 8244 ///. 8245 T peek() { 8246 assert(internalLength); 8247 return arr[internalLength - 1]; 8248 } 8249 8250 ///. 8251 @property bool empty() { 8252 return internalLength ? false : true; 8253 } 8254 8255 ///. 8256 private T[] arr; 8257 private size_t internalLength; 8258 private T[64] initialBuffer; 8259 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8260 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8261 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8262 } 8263 8264 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8265 final class ElementStream { 8266 8267 ///. 8268 @property Element front() { 8269 return current.element; 8270 } 8271 8272 /// Use Element.tree instead. 8273 this(Element start) { 8274 current.element = start; 8275 current.childPosition = -1; 8276 isEmpty = false; 8277 stack = new Stack!(Current); 8278 } 8279 8280 /* 8281 Handle it 8282 handle its children 8283 8284 */ 8285 8286 ///. 8287 void popFront() { 8288 more: 8289 if(isEmpty) return; 8290 8291 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8292 8293 current.childPosition++; 8294 if(current.childPosition >= current.element.children.length) { 8295 if(stack.empty()) 8296 isEmpty = true; 8297 else { 8298 current = stack.pop(); 8299 goto more; 8300 } 8301 } else { 8302 stack.push(current); 8303 current.element = current.element.children[current.childPosition]; 8304 current.childPosition = -1; 8305 } 8306 } 8307 8308 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8309 void currentKilled() { 8310 if(stack.empty) // should never happen 8311 isEmpty = true; 8312 else { 8313 current = stack.pop(); 8314 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8315 } 8316 } 8317 8318 ///. 8319 @property bool empty() { 8320 return isEmpty; 8321 } 8322 8323 private: 8324 8325 struct Current { 8326 Element element; 8327 int childPosition; 8328 } 8329 8330 Current current; 8331 8332 Stack!(Current) stack; 8333 8334 bool isEmpty; 8335 } 8336 8337 8338 8339 // unbelievable. 8340 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8341 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8342 static import std.algorithm; 8343 auto found = std.algorithm.find(haystack, needle); 8344 if(found.length == 0) 8345 return -1; 8346 return haystack.length - found.length; 8347 } 8348 8349 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8350 assert(position < arr.length); 8351 T[] ret; 8352 ret.length = arr.length + what.length; 8353 int a = 0; 8354 foreach(i; arr[0..position+1]) 8355 ret[a++] = i; 8356 8357 foreach(i; what) 8358 ret[a++] = i; 8359 8360 foreach(i; arr[position+1..$]) 8361 ret[a++] = i; 8362 8363 return ret; 8364 } 8365 8366 package bool isInArray(T)(T item, T[] arr) { 8367 foreach(i; arr) 8368 if(item == i) 8369 return true; 8370 return false; 8371 } 8372 8373 private string[string] aadup(in string[string] arr) { 8374 string[string] ret; 8375 foreach(k, v; arr) 8376 ret[k] = v; 8377 return ret; 8378 } 8379 8380 // dom event support, if you want to use it 8381 8382 /// used for DOM events 8383 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8384 8385 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8386 class Event { 8387 this(string eventName, Element target) { 8388 this.eventName = eventName; 8389 this.srcElement = target; 8390 } 8391 8392 /// Prevents the default event handler (if there is one) from being called 8393 void preventDefault() { 8394 defaultPrevented = true; 8395 } 8396 8397 /// Stops the event propagation immediately. 8398 void stopPropagation() { 8399 propagationStopped = true; 8400 } 8401 8402 bool defaultPrevented; 8403 bool propagationStopped; 8404 string eventName; 8405 8406 Element srcElement; 8407 alias srcElement target; 8408 8409 Element relatedTarget; 8410 8411 int clientX; 8412 int clientY; 8413 8414 int button; 8415 8416 bool isBubbling; 8417 8418 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8419 void send() { 8420 if(srcElement is null) 8421 return; 8422 8423 auto e = srcElement; 8424 8425 if(eventName in e.bubblingEventHandlers) 8426 foreach(handler; e.bubblingEventHandlers[eventName]) 8427 handler(e, this); 8428 8429 if(!defaultPrevented) 8430 if(eventName in e.defaultEventHandlers) 8431 e.defaultEventHandlers[eventName](e, this); 8432 } 8433 8434 /// this dispatches the element using the capture -> target -> bubble process 8435 void dispatch() { 8436 if(srcElement is null) 8437 return; 8438 8439 // first capture, then bubble 8440 8441 Element[] chain; 8442 Element curr = srcElement; 8443 while(curr) { 8444 auto l = curr; 8445 chain ~= l; 8446 curr = curr.parentNode; 8447 8448 } 8449 8450 isBubbling = false; 8451 8452 foreach(e; chain.retro()) { 8453 if(eventName in e.capturingEventHandlers) 8454 foreach(handler; e.capturingEventHandlers[eventName]) 8455 handler(e, this); 8456 8457 // the default on capture should really be to always do nothing 8458 8459 //if(!defaultPrevented) 8460 // if(eventName in e.defaultEventHandlers) 8461 // e.defaultEventHandlers[eventName](e.element, this); 8462 8463 if(propagationStopped) 8464 break; 8465 } 8466 8467 isBubbling = true; 8468 if(!propagationStopped) 8469 foreach(e; chain) { 8470 if(eventName in e.bubblingEventHandlers) 8471 foreach(handler; e.bubblingEventHandlers[eventName]) 8472 handler(e, this); 8473 8474 if(propagationStopped) 8475 break; 8476 } 8477 8478 if(!defaultPrevented) 8479 foreach(e; chain) { 8480 if(eventName in e.defaultEventHandlers) 8481 e.defaultEventHandlers[eventName](e, this); 8482 } 8483 } 8484 } 8485 8486 struct FormFieldOptions { 8487 // usable for any 8488 8489 /// this is a regex pattern used to validate the field 8490 string pattern; 8491 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8492 bool isRequired; 8493 /// this is displayed as an example to the user 8494 string placeholder; 8495 8496 // usable for numeric ones 8497 8498 8499 // convenience methods to quickly get some options 8500 @property static FormFieldOptions none() { 8501 FormFieldOptions f; 8502 return f; 8503 } 8504 8505 static FormFieldOptions required() { 8506 FormFieldOptions f; 8507 f.isRequired = true; 8508 return f; 8509 } 8510 8511 static FormFieldOptions regex(string pattern, bool required = false) { 8512 FormFieldOptions f; 8513 f.pattern = pattern; 8514 f.isRequired = required; 8515 return f; 8516 } 8517 8518 static FormFieldOptions fromElement(Element e) { 8519 FormFieldOptions f; 8520 if(e.hasAttribute("required")) 8521 f.isRequired = true; 8522 if(e.hasAttribute("pattern")) 8523 f.pattern = e.pattern; 8524 if(e.hasAttribute("placeholder")) 8525 f.placeholder = e.placeholder; 8526 return f; 8527 } 8528 8529 Element applyToElement(Element e) { 8530 if(this.isRequired) 8531 e.required = "required"; 8532 if(this.pattern.length) 8533 e.pattern = this.pattern; 8534 if(this.placeholder.length) 8535 e.placeholder = this.placeholder; 8536 return e; 8537 } 8538 } 8539 8540 // this needs to look just like a string, but can expand as needed 8541 version(no_dom_stream) 8542 alias string Utf8Stream; 8543 else 8544 class Utf8Stream { 8545 protected: 8546 // these two should be overridden in subclasses to actually do the stream magic 8547 string getMore() { 8548 if(getMoreHelper !is null) 8549 return getMoreHelper(); 8550 return null; 8551 } 8552 8553 bool hasMore() { 8554 if(hasMoreHelper !is null) 8555 return hasMoreHelper(); 8556 return false; 8557 } 8558 // the rest should be ok 8559 8560 public: 8561 this(string d) { 8562 this.data = d; 8563 } 8564 8565 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8566 this.getMoreHelper = getMoreHelper; 8567 this.hasMoreHelper = hasMoreHelper; 8568 8569 if(hasMore()) 8570 this.data ~= getMore(); 8571 8572 stdout.flush(); 8573 } 8574 8575 @property final size_t length() { 8576 // the parser checks length primarily directly before accessing the next character 8577 // so this is the place we'll hook to append more if possible and needed. 8578 if(lastIdx + 1 >= data.length && hasMore()) { 8579 data ~= getMore(); 8580 } 8581 return data.length; 8582 } 8583 8584 final char opIndex(size_t idx) { 8585 if(idx > lastIdx) 8586 lastIdx = idx; 8587 return data[idx]; 8588 } 8589 8590 final string opSlice(size_t start, size_t end) { 8591 if(end > lastIdx) 8592 lastIdx = end; 8593 return data[start .. end]; 8594 } 8595 8596 final size_t opDollar() { 8597 return length(); 8598 } 8599 8600 final Utf8Stream opBinary(string op : "~")(string s) { 8601 this.data ~= s; 8602 return this; 8603 } 8604 8605 final Utf8Stream opOpAssign(string op : "~")(string s) { 8606 this.data ~= s; 8607 return this; 8608 } 8609 8610 final Utf8Stream opAssign(string rhs) { 8611 this.data = rhs; 8612 return this; 8613 } 8614 private: 8615 string data; 8616 8617 size_t lastIdx; 8618 8619 bool delegate() hasMoreHelper; 8620 string delegate() getMoreHelper; 8621 8622 8623 /+ 8624 // used to maybe clear some old stuff 8625 // you might have to remove elements parsed with it too since they can hold slices into the 8626 // old stuff, preventing gc 8627 void dropFront(int bytes) { 8628 posAdjustment += bytes; 8629 data = data[bytes .. $]; 8630 } 8631 8632 int posAdjustment; 8633 +/ 8634 } 8635 8636 void fillForm(T)(Form form, T obj, string name) { 8637 import arsd.database; 8638 fillData((k, v) => form.setValue(k, v), obj, name); 8639 } 8640 8641 8642 /+ 8643 /+ 8644 Syntax: 8645 8646 Tag: tagname#id.class 8647 Tree: Tag(Children, comma, separated...) 8648 Children: Tee or Variable 8649 Variable: $varname with optional |funcname following. 8650 8651 If a variable has a tree after it, it breaks the variable down: 8652 * if array, foreach it does the tree 8653 * if struct, it breaks down the member variables 8654 8655 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8656 +/ 8657 struct Stringplate { 8658 /++ 8659 8660 +/ 8661 this(string s) { 8662 8663 } 8664 8665 /++ 8666 8667 +/ 8668 Element expand(T...)(T vars) { 8669 return null; 8670 } 8671 } 8672 /// 8673 unittest { 8674 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8675 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8676 } 8677 +/ 8678 8679 bool allAreInlineHtml(const(Element)[] children) { 8680 foreach(child; children) { 8681 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8682 // cool 8683 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children)) { 8684 // cool 8685 } else { 8686 // prolly block 8687 return false; 8688 } 8689 } 8690 return true; 8691 } 8692 8693 private bool isSimpleWhite(dchar c) { 8694 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8695 } 8696 8697 unittest { 8698 // Test for issue #120 8699 string s = `<html> 8700 <body> 8701 <P>AN 8702 <P>bubbles</P> 8703 <P>giggles</P> 8704 </body> 8705 </html>`; 8706 auto doc = new Document(); 8707 doc.parseUtf8(s, false, false); 8708 auto s2 = doc.toString(); 8709 assert( 8710 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8711 "paragraph order incorrect:\n" ~ s2); 8712 } 8713 8714 unittest { 8715 // test for suncarpet email dec 24 2019 8716 // arbitrary id asduiwh 8717 auto document = new Document("<html> 8718 <head> 8719 <meta charset=\"utf-8\"></meta> 8720 <title>Element.querySelector Test</title> 8721 </head> 8722 <body> 8723 <div id=\"foo\"> 8724 <div>Foo</div> 8725 <div>Bar</div> 8726 </div> 8727 </body> 8728 </html>"); 8729 8730 auto doc = document; 8731 8732 assert(doc.querySelectorAll("div div").length == 2); 8733 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8734 assert(doc.querySelectorAll("> html").length == 0); 8735 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8736 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8737 8738 8739 assert(doc.root.matches("html")); 8740 assert(!doc.root.matches("nothtml")); 8741 assert(doc.querySelector("#foo > div").matches("div")); 8742 assert(doc.querySelector("body > #foo").matches("#foo")); 8743 8744 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8745 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8746 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8747 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8748 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8749 } 8750 8751 unittest { 8752 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8753 auto document = new Document(`<article> 8754 <div id="div-01">Here is div-01 8755 <div id="div-02">Here is div-02 8756 <div id="div-03">Here is div-03</div> 8757 </div> 8758 </div> 8759 </article>`, true, true); 8760 8761 auto el = document.getElementById("div-03"); 8762 assert(el.closest("#div-02").id == "div-02"); 8763 assert(el.closest("div div").id == "div-03"); 8764 assert(el.closest("article > div").id == "div-01"); 8765 assert(el.closest(":not(div)").tagName == "article"); 8766 8767 assert(el.closest("p") is null); 8768 assert(el.closest("p, div") is el); 8769 } 8770 8771 /* 8772 Copyright: Adam D. Ruppe, 2010 - 2020 8773 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 8774 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 8775 8776 Copyright Adam D. Ruppe 2010-2020. 8777 Distributed under the Boost Software License, Version 1.0. 8778 (See accompanying file LICENSE_1_0.txt or copy at 8779 http://www.boost.org/LICENSE_1_0.txt) 8780 */ 8781 8782