1 // FIXME: add +proj and -proj to adjust project results 2 3 // dmdi -g -debug -version=vps locate stemmer.d -oflocate_vps -version=scgi 4 5 // my local config assumes this will be on port 9653 6 7 module adrdox.locate; 8 9 import arsd.postgres; 10 11 import ps = PorterStemmer; 12 import arsd.cgi; 13 import arsd.dom; 14 import std.stdio; 15 import std.file; 16 import std.conv : to; 17 import std.algorithm : sort; 18 import std.string : toLower, replace, split; 19 20 PostgreSql db_; 21 22 PostgreSql db() { 23 if(db_ is null) { 24 db_ = new PostgreSql("dbname=adrdox"); 25 } 26 return db_; 27 } 28 29 TermElement[] resultsByTerm(string term) { 30 TermElement[] ret; 31 foreach(row; db.query("SELECT d_symbols.id, score FROM hand_written_tags INNER JOIN d_symbols ON d_symbol_fully_qualified_name = fully_qualified_name WHERE tag ILIKE ? ORDER BY score DESC", term)) 32 ret ~= TermElement(to!int(row[0]), to!int(row[1])); 33 34 foreach(row; db.query(" 35 SELECT 36 d_symbols.id, fully_qualified_name 37 FROM 38 d_symbols 39 INNER JOIN 40 package_version ON package_version_id = package_version.id 41 WHERE 42 is_latest = true 43 AND 44 ( 45 fully_qualified_name = ? 46 OR 47 name = ? 48 OR 49 substring(fully_qualified_name, length(module_name) + 2) = ? 50 ) 51 ", term, term, term)) { 52 ret ~= TermElement(to!int(row[0]), row[1] == term ? 50 : 25); 53 } 54 55 version(none) 56 foreach(row; db.query(" 57 SELECT 58 d_symbols_id, score 59 FROM 60 auto_generated_tags 61 INNER JOIN 62 package_version ON package_version_id = package_version.id 63 WHERE 64 tag ILIKE ? 65 AND 66 is_latest = true 67 ORDER BY 68 score + (case (dub_package_id = 6 or dub_package_id = 9) when true then 5 else 0 end) DESC 69 ", term)) 70 ret ~= TermElement(to!int(row[0]), to!int(row[1])); 71 return ret; 72 } 73 74 DeclElement getDecl(int i) { 75 foreach(row; db.query(" 76 SELECT 77 d_symbols.*, 78 dub_package.url_name AS package_subdomain 79 FROM 80 d_symbols 81 INNER JOIN 82 package_version ON package_version.id = d_symbols.package_version_id 83 INNER JOIN 84 dub_package ON dub_package.id = package_version.dub_package_id 85 WHERE 86 d_symbols.id = ? 87 AND 88 is_latest = true 89 ", i)) { 90 return DeclElement(row["fully_qualified_name"], row["summary"], row["url_name"], row["id"].to!int, "", 0, row["package_subdomain"]); 91 } 92 return DeclElement.init; 93 } 94 95 static struct TermElement { 96 int declId; 97 int score; 98 } 99 100 static struct DeclElement { 101 string name; 102 string description; // actually HTML 103 string link; 104 int id; 105 string type; 106 int parent; 107 string packageName; 108 } 109 110 static struct Magic { 111 int declId; 112 int score; 113 DeclElement decl; 114 } 115 116 int getProjectAdjustment(DeclElement details, string preferredProject) { 117 int projectAdjustment; 118 if(preferredProject.length) { 119 if(preferredProject == details.packageName) 120 projectAdjustment = 150; 121 } 122 if(details.packageName == "phobos" || details.packageName == "druntime") 123 projectAdjustment += 50; 124 if(details.packageName == "arsd-official") 125 projectAdjustment += 30; 126 127 return projectAdjustment; 128 } 129 130 Magic[] getPossibilities(string search, string preferredProject) { 131 int[int] declScores; 132 133 int[int] declHits; 134 135 // ps.PorterStemmer s; 136 137 auto terms = search.split(" ");// ~ search.split("."); 138 // filter empty terms 139 for(int i = 0; i < terms.length; i++) { 140 if(terms[i].strip.length == 0) { 141 terms[i] = terms[$-1]; 142 terms = terms[0 .. $-1]; 143 i--; 144 } 145 } 146 147 void addHit(TermElement item, size_t idx) { 148 if(idx == 0) { 149 declScores[item.declId] += item.score; 150 return; 151 } 152 if(item.declId in declScores) { 153 //declScores[item.declId] += 25; // hit both terms 154 declScores[item.declId] += item.score; 155 } else { 156 // only hit one term... 157 declScores[item.declId] += item.score / 2; 158 } 159 } 160 161 // On each term, we want to check for exact match and fuzzy match / natural language match. 162 // FIXME: if something matches both it should be really strong. see time_t vs "time_t std.datetime" 163 foreach(idx, term; terms) { 164 assert(term.length > 0); 165 166 foreach(item; resultsByTerm(term)) { 167 addHit(item, idx); 168 declHits[item.declId] |= 1 << idx; 169 } 170 /+ 171 auto st = s.stem(term.toLower).idup; 172 if(st != l) 173 foreach(item; resultsByTerm(st)) { 174 addHit(item, idx); 175 declHits[item.declId] |= 1 << idx; 176 } 177 +/ 178 } 179 180 Magic[] magic; 181 182 string[string] fqns; 183 184 foreach(decl, score; declScores) { 185 auto hits = declHits[decl]; 186 foreach(idx, term; terms) { 187 if(!(hits & (1 << idx))) 188 score /= 2; 189 } 190 auto details = getDecl(decl); 191 /+ 192 if(details.name in fqns) 193 continue; 194 fqns[details.name] = details.name; 195 +/ 196 int projectAdjustment = getProjectAdjustment(details, preferredProject); 197 magic ~= Magic(decl, score + projectAdjustment, details); 198 } 199 200 if(magic.length == 0) { 201 foreach(term; terms) { 202 if(term.length == 0) continue; 203 //term = term.toLower(); 204 foreach(row; db.query("SELECT id, fully_qualified_name FROM d_symbols WHERE fully_qualified_name > ? LIMIT 50", term)) { 205 string name = row[1]; 206 int id = row[0].to!int; 207 /+ 208 import std.algorithm; 209 name = name.toLower; 210 auto dist = cast(int) levenshteinDistance(name, term); 211 if(dist <= 2) { 212 +/ 213 int dist = 0; 214 { 215 auto details = getDecl(id); 216 int projectAdjustment = getProjectAdjustment(details, preferredProject); 217 magic ~= Magic(id, projectAdjustment + (3 - dist), details); 218 } 219 } 220 } 221 } 222 223 // boosts based on topography 224 foreach(ref item; magic) { 225 auto decl = item.decl; 226 if(decl.type == "module") { 227 // if it is a module, give it moar points 228 item.score += 8; 229 continue; 230 } 231 if(getDecl(decl.id).type == "module") { 232 item.score += 5; 233 } 234 } 235 236 return magic; 237 } 238 239 import std.uri; 240 241 void searcher(Cgi cgi) { 242 243 auto search = cgi.request("q", cgi.request("searchTerm", cgi.queryString)); 244 245 version(vps) { 246 string path = cgi.requestUri; 247 248 auto q = path.indexOf("?"); 249 if(q != -1) { 250 path = path[0 .. q]; 251 } 252 253 if(path.length && path[0] == '/') 254 path = path[1 .. $]; 255 256 if(path.length == 0 && search.length == 0) { 257 import std.file; 258 259 cgi.write(std.file.read("/dpldocs-build/search-home.html"), true); 260 return; 261 } 262 263 264 if(path == "script.js") { 265 import std.file; 266 cgi.setResponseContentType("text/javascript"); 267 cgi.write(std.file.read("/dpldocs-build/script.js"), true); 268 return; 269 270 } 271 272 if(path == "style.css") { 273 import std.file; 274 cgi.setResponseContentType("text/css"); 275 cgi.write(std.file.read("/dpldocs-build/style.css"), true); 276 return; 277 } 278 } else { 279 string path = cgi.requestUri; 280 281 auto q = path.indexOf("?"); 282 if(q != -1) { 283 path = path[0 .. q]; 284 } 285 286 if(path.length && path[0] == '/') 287 path = path[1 .. $]; 288 289 290 } 291 292 alias searchTerm = search; 293 294 if(search.length == 0 && path.length) 295 search = path; 296 297 if(search.length == 0) { 298 cgi.setResponseLocation("/"); 299 return; 300 } 301 auto parts = search.split(" "); 302 switch(parts[0].toLower()) { 303 case "auto-ref-return-function-prototype": 304 cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-ref-functions"); 305 return; 306 case "auto-function-return-prototype": 307 cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-functions"); 308 return; 309 case "ref-function-return-prototype": 310 cgi.setResponseLocation("http://dlang.org/spec/function.html#ref-functions"); 311 return; 312 case "bugzilla": 313 auto url = "http://d.puremagic.com/issues/"; 314 if(parts.length > 1) 315 url ~= "show_bug.cgi?id=" ~ parts[1]; 316 cgi.setResponseLocation(url); 317 return; 318 case "dip": 319 auto url = "http://wiki.dlang.org/DIPs"; 320 if(parts.length > 1) 321 url = "http://wiki.dlang.org/DIP" ~ parts[1]; 322 cgi.setResponseLocation(url); 323 return; 324 case "wiki": 325 auto url = "http://wiki.dlang.org/"; 326 if(parts.length > 1) 327 url ~= "search="~std.uri.encodeComponent(join(parts[1..$], " 328 "))~"&go=Go&title=Special%3ASearch"; 329 cgi.setResponseLocation(url); 330 return; 331 case "faqs": 332 case "faq": 333 cgi.setResponseLocation("http://wiki.dlang.org/FAQs"); 334 return; 335 case "template-alias-parameter": 336 cgi.setResponseLocation("https://dlang.org/spec/template.html#aliasparameters"); 337 return; 338 case "is-expression": 339 cgi.setResponseLocation("https://dlang.org/spec/expression.html#IsExpression"); 340 return; 341 case "typeof-expression": 342 cgi.setResponseLocation("https://dlang.org/spec/declaration.html#Typeof"); 343 return; 344 case "oldwiki": 345 auto url = "http://prowiki.org/wiki4d/wiki.cgi"; 346 if(parts.length > 1) 347 url ~= "?formpage=Search&id=Search&search=" ~ std.uri. 348 encodeComponent(join(parts[1..$], " ")); 349 cgi.setResponseLocation(url); 350 return; 351 default: 352 // just continue 353 version(vps) { } else { 354 /+ 355 if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".1.html")) { 356 cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".1.html"); 357 return; 358 } 359 if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".html")) { 360 cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".html"); 361 return; 362 } 363 +/ 364 // redirect to vps 365 if("local" !in cgi.get) 366 cgi.setResponseLocation("//search.dpldocs.info/?q=" ~ std.uri.encodeComponent(searchTerm)); 367 } 368 } 369 370 371 Magic[] magic = getPossibilities(search, cgi.request("project")); 372 373 sort!((a, b) => a.score > b.score)(magic); 374 375 // adjustments based on previously showing results 376 { 377 bool[int] alreadyPresent; 378 foreach(ref item; magic) { 379 auto decl = item.decl; 380 if(decl.parent in alreadyPresent) 381 item.score -= 8; 382 alreadyPresent[decl.id] = true; 383 } 384 } 385 386 auto document = new Document(); 387 version(vps) { 388 import std.file; 389 document.parseUtf8(readText("/dpldocs-build/skeleton.html"), true, true); 390 document.title = "Dub Documentation Search"; 391 } else 392 document.parseUtf8(import("skeleton.html"), true, true); 393 document.title = "Search Results"; 394 395 auto form = document.requireElementById!Form("search"); 396 form.setValue("searchTerm", search); 397 398 version(vps) { 399 // intentionally blank 400 } else { 401 auto l = document.requireSelector("link"); 402 l.href = "/experimental-docs/" ~ l.href; 403 l = document.requireSelector("script[src]"); 404 l.src = "/experimental-docs/" ~ l.src; 405 } 406 407 auto pc = document.requireSelector("#page-content"); 408 pc.addChild("h1", "Search Results"); 409 auto ml = pc.addChild("dl"); 410 ml.className = "member-list"; 411 412 string getFqn(DeclElement i) { 413 string n; 414 while(true) { 415 if(n) n = "." ~ n; 416 n = i.name ~ n; 417 if(i.type == "module") 418 break; 419 if(i.parent == 0) 420 break; 421 i = getDecl(i.parent); 422 if(i.id == 0) 423 break; 424 } 425 return n; 426 } 427 428 bool[string] alreadyPresent; 429 int count = 0; 430 foreach(idx, item; magic) { 431 auto decl = item.decl; 432 if(decl.id == 0) continue; // should never happen 433 version(vps) 434 auto link = "//"~decl.packageName~".dpldocs.info/" ~ decl.link; 435 else 436 auto link = "//dpldocs.info/experimental-docs/" ~ decl.link; 437 if(decl.link.length && decl.link[0] == '/') 438 link = decl.link; 439 auto fqn = getFqn(decl); 440 if(fqn in alreadyPresent) 441 continue; 442 alreadyPresent[fqn] = true; 443 auto dt = ml.addChild("dt"); 444 dt.addClass("search-result"); 445 dt.addChild("span", decl.packageName).addClass("project-name"); 446 dt.addChild("br"); 447 dt.addChild("a", fqn.replace(".", ".\u200B"), link); 448 dt.dataset.score = to!string(item.score); 449 auto html = decl.description; 450 //auto d = new Document(html); 451 //writeln(d.root.innerText.replace("\n", " ")); 452 //writeln(); 453 454 // FIXME fix relative links from here 455 ml.addChild("dd", Html(html)); 456 foreach(a; ml.querySelectorAll("a[href]")) { 457 auto uri = Uri(a.href).basedOn(Uri("//" ~ decl.packageName~".dpldocs.info/")); 458 a.href = uri.toString; 459 } 460 count++; 461 462 if(count >= 20) 463 break; 464 } 465 466 cgi.write(document.toString, true); 467 } 468 469 mixin GenericMain!(searcher); 470