1 // FIXME: add +proj and -proj to adjust project results 2 3 // dmdi -g -debug -version=vps locate stemmer.d -oflocate_vps -version=scgi 4 5 // my local config assumes this will be on port 9653 6 7 module adrdox.locate; 8 9 import arsd.postgres; 10 11 import ps = PorterStemmer; 12 import arsd.cgi; 13 import arsd.dom; 14 import std.stdio; 15 import std.file; 16 import std.conv : to; 17 import std.algorithm : sort; 18 import std.string : toLower, replace, split; 19 20 PostgreSql db_; 21 22 PostgreSql db() { 23 if(db_ is null) { 24 db_ = new PostgreSql("dbname=adrdox"); 25 } 26 return db_; 27 } 28 29 TermElement[] resultsByTerm(string term) { 30 TermElement[] ret; 31 foreach(row; db.query("SELECT d_symbols_id, score FROM auto_generated_tags WHERE tag = ? ORDER BY score DESC LIMIT 15", term)) 32 ret ~= TermElement(to!int(row[0]), to!int(row[1])); 33 return ret; 34 } 35 36 DeclElement getDecl(int i) { 37 foreach(row; db.query(" 38 SELECT 39 d_symbols.*, 40 dub_package.url_name AS package_subdomain 41 FROM 42 d_symbols 43 INNER JOIN 44 package_version ON package_version.id = d_symbols.package_version_id 45 INNER JOIN 46 dub_package ON dub_package.id = package_version.dub_package_id 47 WHERE 48 d_symbols.id = ? 49 AND 50 is_latest = true 51 ", i)) { 52 return DeclElement(row["fully_qualified_name"], row["summary"], row["url_name"], row["id"].to!int, "", 0, row["package_subdomain"]); 53 } 54 return DeclElement.init; 55 } 56 57 static struct TermElement { 58 int declId; 59 int score; 60 } 61 62 static struct DeclElement { 63 string name; 64 string description; // actually HTML 65 string link; 66 int id; 67 string type; 68 int parent; 69 string packageName; 70 } 71 72 static struct Magic { 73 int declId; 74 int score; 75 DeclElement decl; 76 } 77 78 int getProjectAdjustment(DeclElement details, string preferredProject) { 79 int projectAdjustment; 80 if(preferredProject.length) { 81 if(preferredProject == details.packageName) 82 projectAdjustment = 150; 83 } 84 if(details.packageName == "phobos" || details.packageName == "druntime") 85 projectAdjustment += 50; 86 if(details.packageName == "arsd-official") 87 projectAdjustment += 30; 88 89 return projectAdjustment; 90 } 91 92 Magic[] getPossibilities(string search, string preferredProject) { 93 int[int] declScores; 94 95 int[int] declHits; 96 97 ps.PorterStemmer s; 98 99 auto terms = search.split(" ");// ~ search.split("."); 100 // filter empty terms 101 for(int i = 0; i < terms.length; i++) { 102 if(terms[i].length == 0) { 103 terms[i] = terms[$-1]; 104 terms = terms[0 .. $-1]; 105 i--; 106 } 107 } 108 109 void addHit(TermElement item, size_t idx) { 110 if(idx == 0) { 111 declScores[item.declId] += item.score; 112 return; 113 } 114 if(item.declId in declScores) { 115 declScores[item.declId] += 25; // hit both terms 116 declScores[item.declId] += item.score; 117 } else { 118 // only hit one term... 119 declScores[item.declId] += item.score / 2; 120 } 121 } 122 123 // On each term, we want to check for exact match and fuzzy match / natural language match. 124 // FIXME: if something matches both it should be really strong. see time_t vs "time_t std.datetime" 125 foreach(idx, term; terms) { 126 assert(term.length > 0); 127 128 foreach(item; resultsByTerm(term)) { 129 addHit(item, idx); 130 declHits[item.declId] |= 1 << idx; 131 } 132 auto l = term.toLower; 133 if(l != term) 134 foreach(item; resultsByTerm(l)) { 135 addHit(item, idx); 136 declHits[item.declId] |= 1 << idx; 137 } 138 auto st = s.stem(term.toLower).idup; 139 if(st != l) 140 foreach(item; resultsByTerm(st)) { 141 addHit(item, idx); 142 declHits[item.declId] |= 1 << idx; 143 } 144 } 145 146 Magic[] magic; 147 148 foreach(decl, score; declScores) { 149 auto hits = declHits[decl]; 150 foreach(idx, term; terms) { 151 if(!(hits & (1 << idx))) 152 score /= 2; 153 } 154 auto details = getDecl(decl); 155 int projectAdjustment = getProjectAdjustment(details, preferredProject); 156 magic ~= Magic(decl, score + projectAdjustment, details); 157 } 158 159 if(magic.length == 0) { 160 foreach(term; terms) { 161 if(term.length == 0) continue; 162 term = term.toLower(); 163 foreach(row; db.query("SELECT id, fully_qualified_name FROM d_symbols WHERE fully_qualified_name LIKE ?", term[0 .. 1] ~ "%")) { 164 string name = row[1]; 165 int id = row[0].to!int; 166 import std.algorithm; 167 name = name.toLower; 168 auto dist = cast(int) levenshteinDistance(name, term); 169 if(dist <= 2) { 170 auto details = getDecl(id); 171 int projectAdjustment = getProjectAdjustment(details, preferredProject); 172 magic ~= Magic(id, projectAdjustment + (3 - dist), details); 173 } 174 } 175 } 176 } 177 178 // boosts based on topography 179 foreach(ref item; magic) { 180 auto decl = item.decl; 181 if(decl.type == "module") { 182 // if it is a module, give it moar points 183 item.score += 8; 184 continue; 185 } 186 if(getDecl(decl.id).type == "module") { 187 item.score += 5; 188 } 189 } 190 191 return magic; 192 } 193 194 import std.uri; 195 196 void searcher(Cgi cgi) { 197 198 auto search = cgi.request("q", cgi.request("searchTerm", cgi.queryString)); 199 200 version(vps) { 201 string path = cgi.requestUri; 202 203 auto q = path.indexOf("?"); 204 if(q != -1) { 205 path = path[0 .. q]; 206 } 207 208 if(path.length && path[0] == '/') 209 path = path[1 .. $]; 210 211 if(path.length == 0 && search.length == 0) { 212 import std.file; 213 214 cgi.write(std.file.read("/dpldocs-build/search-home.html"), true); 215 return; 216 } 217 218 219 if(path == "script.js") { 220 import std.file; 221 cgi.setResponseContentType("text/javascript"); 222 cgi.write(std.file.read("/dpldocs-build/script.js"), true); 223 return; 224 225 } 226 227 if(path == "style.css") { 228 import std.file; 229 cgi.setResponseContentType("text/css"); 230 cgi.write(std.file.read("/dpldocs-build/style.css"), true); 231 return; 232 } 233 } 234 235 alias searchTerm = search; 236 237 if(search.length == 0 && path.length) 238 search = path; 239 240 if(search.length == 0) { 241 cgi.setResponseLocation("/"); 242 return; 243 } 244 auto parts = search.split(" "); 245 switch(parts[0].toLower()) { 246 case "auto-ref-return-function-prototype": 247 cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-ref-functions"); 248 return; 249 case "auto-function-return-prototype": 250 cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-functions"); 251 return; 252 case "ref-function-return-prototype": 253 cgi.setResponseLocation("http://dlang.org/spec/function.html#ref-functions"); 254 return; 255 case "bugzilla": 256 auto url = "http://d.puremagic.com/issues/"; 257 if(parts.length > 1) 258 url ~= "show_bug.cgi?id=" ~ parts[1]; 259 cgi.setResponseLocation(url); 260 return; 261 case "dip": 262 auto url = "http://wiki.dlang.org/DIPs"; 263 if(parts.length > 1) 264 url = "http://wiki.dlang.org/DIP" ~ parts[1]; 265 cgi.setResponseLocation(url); 266 return; 267 case "wiki": 268 auto url = "http://wiki.dlang.org/"; 269 if(parts.length > 1) 270 url ~= "search="~std.uri.encodeComponent(join(parts[1..$], " 271 "))~"&go=Go&title=Special%3ASearch"; 272 cgi.setResponseLocation(url); 273 return; 274 case "faqs": 275 case "faq": 276 cgi.setResponseLocation("http://wiki.dlang.org/FAQs"); 277 return; 278 case "template-alias-parameter": 279 cgi.setResponseLocation("https://dlang.org/spec/template.html#aliasparameters"); 280 return; 281 case "is-expression": 282 cgi.setResponseLocation("https://dlang.org/spec/expression.html#IsExpression"); 283 return; 284 case "typeof-expression": 285 cgi.setResponseLocation("https://dlang.org/spec/declaration.html#Typeof"); 286 return; 287 case "oldwiki": 288 auto url = "http://prowiki.org/wiki4d/wiki.cgi"; 289 if(parts.length > 1) 290 url ~= "?formpage=Search&id=Search&search=" ~ std.uri. 291 encodeComponent(join(parts[1..$], " ")); 292 cgi.setResponseLocation(url); 293 return; 294 default: 295 // just continue 296 version(vps) { } else { 297 if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".1.html")) { 298 cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".1.html"); 299 return; 300 } 301 if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".html")) { 302 cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".html"); 303 return; 304 } 305 // redirect to vps 306 if("local" !in cgi.get) 307 cgi.setResponseLocation("//search.dpldocs.info/?q=" ~ std.uri.encodeComponent(searchTerm)); 308 } 309 } 310 311 312 Magic[] magic = getPossibilities(search, cgi.request("project")); 313 314 sort!((a, b) => a.score > b.score)(magic); 315 316 // adjustments based on previously showing results 317 { 318 bool[int] alreadyPresent; 319 foreach(ref item; magic) { 320 auto decl = item.decl; 321 if(decl.parent in alreadyPresent) 322 item.score -= 8; 323 alreadyPresent[decl.id] = true; 324 } 325 } 326 327 auto document = new Document(); 328 version(vps) { 329 import std.file; 330 document.parseUtf8(readText("/dpldocs-build/skeleton.html"), true, true); 331 document.title = "Dub Documentation Search"; 332 } else 333 document.parseUtf8(import("skeleton.html"), true, true); 334 document.title = "Search Results"; 335 336 auto form = document.requireElementById!Form("search"); 337 form.setValue("searchTerm", search); 338 339 version(vps) { 340 // intentionally blank 341 } else { 342 auto l = document.requireSelector("link"); 343 l.href = "/experimental-docs/" ~ l.href; 344 l = document.requireSelector("script[src]"); 345 l.src = "/experimental-docs/" ~ l.src; 346 } 347 348 auto pc = document.requireSelector("#page-content"); 349 pc.addChild("h1", "Search Results"); 350 auto ml = pc.addChild("dl"); 351 ml.className = "member-list"; 352 353 string getFqn(DeclElement i) { 354 string n; 355 while(true) { 356 if(n) n = "." ~ n; 357 n = i.name ~ n; 358 if(i.type == "module") 359 break; 360 if(i.parent == 0) 361 break; 362 i = getDecl(i.parent); 363 if(i.id == 0) 364 break; 365 } 366 return n; 367 } 368 369 bool[string] alreadyPresent; 370 int count = 0; 371 foreach(idx, item; magic) { 372 auto decl = item.decl; 373 if(decl.id == 0) continue; // should never happen 374 version(vps) 375 auto link = "//"~decl.packageName~".dpldocs.info/" ~ decl.link; 376 else 377 auto link = "//dpldocs.info/experimental-docs/" ~ decl.link; 378 if(decl.link.length && decl.link[0] == '/') 379 link = decl.link; 380 auto fqn = getFqn(decl); 381 if(fqn in alreadyPresent) 382 continue; 383 alreadyPresent[fqn] = true; 384 auto dt = ml.addChild("dt"); 385 dt.addClass("search-result"); 386 dt.addChild("span", decl.packageName).addClass("project-name"); 387 dt.addChild("br"); 388 dt.addChild("a", fqn.replace(".", ".\u200B"), link); 389 dt.dataset.score = to!string(item.score); 390 auto html = decl.description; 391 //auto d = new Document(html); 392 //writeln(d.root.innerText.replace("\n", " ")); 393 //writeln(); 394 395 // FIXME fix relative links from here 396 ml.addChild("dd", Html(html)); 397 count++; 398 399 if(count >= 20) 400 break; 401 } 402 403 cgi.write(document.toString, true); 404 } 405 406 mixin GenericMain!(searcher); 407