1 // FIXME: add +proj and -proj to adjust project results 2 3 // dmdi -g -debug -version=vps locate stemmer.d -oflocate_vps -version=scgi 4 5 // my local config assumes this will be on port 9653 6 7 module adrdox.locate; 8 9 import arsd.postgres; 10 11 import ps = PorterStemmer; 12 import arsd.cgi; 13 import arsd.dom; 14 import std.stdio; 15 import std.file; 16 import std.conv : to; 17 import std.algorithm : sort; 18 import std..string : toLower, replace, split; 19 20 PostgreSql db_; 21 22 enum projectAdjustment = 0; 23 24 PostgreSql db() { 25 if(db_ is null) { 26 db_ = new PostgreSql("dbname=adrdox"); 27 } 28 return db_; 29 } 30 31 TermElement[] resultsByTerm(string term) { 32 TermElement[] ret; 33 foreach(row; db.query("SELECT d_symbols_id, score FROM auto_generated_tags WHERE tag = ? ORDER BY score DESC LIMIT 15", term)) 34 ret ~= TermElement(to!int(row[0]), to!int(row[1])); 35 return ret; 36 } 37 38 DeclElement getDecl(int i) { 39 foreach(row; db.query(" 40 SELECT 41 d_symbols.*, 42 dub_package.url_name AS package_subdomain 43 FROM 44 d_symbols 45 INNER JOIN 46 package_version ON package_version.id = d_symbols.package_version_id 47 INNER JOIN 48 dub_package ON dub_package.id = package_version.dub_package_id 49 WHERE 50 d_symbols.id = ? 51 AND 52 is_latest = true 53 ", i)) { 54 return DeclElement(row["fully_qualified_name"], row["summary"], row["url_name"], row["id"].to!int, "", 0, row["package_subdomain"]); 55 } 56 return DeclElement.init; 57 } 58 59 static struct TermElement { 60 int declId; 61 int score; 62 } 63 64 static struct DeclElement { 65 string name; 66 string description; // actually HTML 67 string link; 68 int id; 69 string type; 70 int parent; 71 string packageName; 72 } 73 74 static struct Magic { 75 int declId; 76 int score; 77 DeclElement decl; 78 } 79 80 Magic[] getPossibilities(string search) { 81 int[int] declScores; 82 83 int[int] declHits; 84 85 ps.PorterStemmer s; 86 87 auto terms = search.split(" ");// ~ search.split("."); 88 // filter empty terms 89 for(int i = 0; i < terms.length; i++) { 90 if(terms[i].length == 0) { 91 terms[i] = terms[$-1]; 92 terms = terms[0 .. $-1]; 93 i--; 94 } 95 } 96 97 void addHit(TermElement item, size_t idx) { 98 if(idx == 0) { 99 declScores[item.declId] += item.score; 100 return; 101 } 102 if(item.declId in declScores) { 103 declScores[item.declId] += 25; // hit both terms 104 declScores[item.declId] += item.score; 105 } else { 106 // only hit one term... 107 declScores[item.declId] += item.score / 2; 108 } 109 } 110 111 // On each term, we want to check for exact match and fuzzy match / natural language match. 112 // FIXME: if something matches both it should be really strong. see time_t vs "time_t std.datetime" 113 foreach(idx, term; terms) { 114 assert(term.length > 0); 115 116 foreach(item; resultsByTerm(term)) { 117 addHit(item, idx); 118 declHits[item.declId] |= 1 << idx; 119 } 120 auto l = term.toLower; 121 if(l != term) 122 foreach(item; resultsByTerm(l)) { 123 addHit(item, idx); 124 declHits[item.declId] |= 1 << idx; 125 } 126 auto st = s.stem(term.toLower).idup; 127 if(st != l) 128 foreach(item; resultsByTerm(st)) { 129 addHit(item, idx); 130 declHits[item.declId] |= 1 << idx; 131 } 132 } 133 134 Magic[] magic; 135 136 foreach(decl, score; declScores) { 137 auto hits = declHits[decl]; 138 foreach(idx, term; terms) { 139 if(!(hits & (1 << idx))) 140 score /= 2; 141 } 142 magic ~= Magic(decl, score + projectAdjustment, getDecl(decl)); 143 } 144 145 if(magic.length == 0) { 146 foreach(term; terms) { 147 if(term.length == 0) continue; 148 term = term.toLower(); 149 foreach(row; db.query("SELECT id, fully_qualified_name FROM d_symbols WHERE fully_qualified_name LIKE ?", term[0 .. 1] ~ "%")) { 150 string name = row[1]; 151 int id = row[0].to!int; 152 import std.algorithm; 153 name = name.toLower; 154 auto dist = cast(int) levenshteinDistance(name, term); 155 if(dist <= 2) 156 magic ~= Magic(id, projectAdjustment + (3 - dist), getDecl(id)); 157 } 158 } 159 } 160 161 // boosts based on topography 162 foreach(ref item; magic) { 163 auto decl = item.decl; 164 if(decl.type == "module") { 165 // if it is a module, give it moar points 166 item.score += 8; 167 continue; 168 } 169 if(getDecl(decl.id).type == "module") { 170 item.score += 5; 171 } 172 } 173 174 return magic; 175 } 176 177 import std.uri; 178 179 void searcher(Cgi cgi) { 180 181 auto search = cgi.request("q", cgi.request("searchTerm", cgi.queryString)); 182 183 version(vps) { 184 string path = cgi.requestUri; 185 186 auto q = path.indexOf("?"); 187 if(q != -1) { 188 path = path[0 .. q]; 189 } 190 191 if(path.length && path[0] == '/') 192 path = path[1 .. $]; 193 194 if(path.length == 0 && search.length == 0) { 195 import std.file; 196 197 cgi.write(std.file.read("/dpldocs-build/search-home.html"), true); 198 return; 199 } 200 201 202 if(path == "script.js") { 203 import std.file; 204 cgi.setResponseContentType("text/javascript"); 205 cgi.write(std.file.read("/dpldocs-build/script.js"), true); 206 return; 207 208 } 209 210 if(path == "style.css") { 211 import std.file; 212 cgi.setResponseContentType("text/css"); 213 cgi.write(std.file.read("/dpldocs-build/style.css"), true); 214 return; 215 } 216 } 217 218 alias searchTerm = search; 219 220 if(search.length == 0 && path.length) 221 search = path; 222 223 if(search.length == 0) { 224 cgi.setResponseLocation("/"); 225 return; 226 } 227 auto parts = search.split(" "); 228 switch(parts[0].toLower()) { 229 case "auto-ref-return-function-prototype": 230 cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-ref-functions"); 231 return; 232 case "auto-function-return-prototype": 233 cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-functions"); 234 return; 235 case "ref-function-return-prototype": 236 cgi.setResponseLocation("http://dlang.org/spec/function.html#ref-functions"); 237 return; 238 case "bugzilla": 239 auto url = "http://d.puremagic.com/issues/"; 240 if(parts.length > 1) 241 url ~= "show_bug.cgi?id=" ~ parts[1]; 242 cgi.setResponseLocation(url); 243 return; 244 case "dip": 245 auto url = "http://wiki.dlang.org/DIPs"; 246 if(parts.length > 1) 247 url = "http://wiki.dlang.org/DIP" ~ parts[1]; 248 cgi.setResponseLocation(url); 249 return; 250 case "wiki": 251 auto url = "http://wiki.dlang.org/"; 252 if(parts.length > 1) 253 url ~= "search="~std.uri.encodeComponent(join(parts[1..$], " 254 "))~"&go=Go&title=Special%3ASearch"; 255 cgi.setResponseLocation(url); 256 return; 257 case "faqs": 258 case "faq": 259 cgi.setResponseLocation("http://wiki.dlang.org/FAQs"); 260 return; 261 case "template-alias-parameter": 262 cgi.setResponseLocation("https://dlang.org/spec/template.html#aliasparameters"); 263 return; 264 case "is-expression": 265 cgi.setResponseLocation("https://dlang.org/spec/expression.html#IsExpression"); 266 return; 267 case "typeof-expression": 268 cgi.setResponseLocation("https://dlang.org/spec/declaration.html#Typeof"); 269 return; 270 case "oldwiki": 271 auto url = "http://prowiki.org/wiki4d/wiki.cgi"; 272 if(parts.length > 1) 273 url ~= "?formpage=Search&id=Search&search=" ~ std.uri. 274 encodeComponent(join(parts[1..$], " ")); 275 cgi.setResponseLocation(url); 276 return; 277 default: 278 // just continue 279 version(vps) { } else { 280 if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".1.html")) { 281 cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".1.html"); 282 return; 283 } 284 if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".html")) { 285 cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".html"); 286 return; 287 } 288 // redirect to vps 289 if("local" !in cgi.get) 290 cgi.setResponseLocation("//search.dpldocs.info/?q=" ~ std.uri.encodeComponent(searchTerm)); 291 } 292 } 293 294 295 Magic[] magic = getPossibilities(search); 296 297 sort!((a, b) => a.score > b.score)(magic); 298 299 // adjustments based on previously showing results 300 { 301 bool[int] alreadyPresent; 302 foreach(ref item; magic) { 303 auto decl = item.decl; 304 if(decl.parent in alreadyPresent) 305 item.score -= 8; 306 alreadyPresent[decl.id] = true; 307 } 308 } 309 310 auto document = new Document(); 311 version(vps) { 312 import std.file; 313 document.parseUtf8(readText("/dpldocs-build/skeleton.html"), true, true); 314 document.title = "Dub Documentation Search"; 315 } else 316 document.parseUtf8(import("skeleton.html"), true, true); 317 document.title = "Search Results"; 318 319 auto form = document.requireElementById!Form("search"); 320 form.setValue("searchTerm", search); 321 322 version(vps) { 323 // intentionally blank 324 } else { 325 auto l = document.requireSelector("link"); 326 l.href = "/experimental-docs/" ~ l.href; 327 l = document.requireSelector("script[src]"); 328 l.src = "/experimental-docs/" ~ l.src; 329 } 330 331 auto pc = document.requireSelector("#page-content"); 332 pc.addChild("h1", "Search Results"); 333 auto ml = pc.addChild("dl"); 334 ml.className = "member-list"; 335 336 string getFqn(DeclElement i) { 337 string n; 338 while(true) { 339 if(n) n = "." ~ n; 340 n = i.name ~ n; 341 if(i.type == "module") 342 break; 343 if(i.parent == 0) 344 break; 345 i = getDecl(i.parent); 346 if(i.id == 0) 347 break; 348 } 349 return n; 350 } 351 352 bool[string] alreadyPresent; 353 int count = 0; 354 foreach(idx, item; magic) { 355 auto decl = item.decl; 356 if(decl.id == 0) continue; // should never happen 357 version(vps) 358 auto link = "http://"~decl.packageName~".dpldocs.info/" ~ decl.link; 359 else 360 auto link = "http://dpldocs.info/experimental-docs/" ~ decl.link; 361 auto fqn = getFqn(decl); 362 if(fqn in alreadyPresent) 363 continue; 364 alreadyPresent[fqn] = true; 365 auto dt = ml.addChild("dt"); 366 dt.addClass("search-result"); 367 dt.addChild("span", decl.packageName).addClass("project-name"); 368 dt.addChild("br"); 369 dt.addChild("a", fqn.replace(".", ".\u200B"), link); 370 dt.dataset.score = to!string(item.score); 371 auto html = decl.description; 372 //auto d = new Document(html); 373 //writeln(d.root.innerText.replace("\n", " ")); 374 //writeln(); 375 376 // FIXME fix relative links from here 377 ml.addChild("dd", Html(html)); 378 count++; 379 380 if(count >= 20) 381 break; 382 } 383 384 cgi.write(document.toString, true); 385 } 386 387 mixin GenericMain!(searcher); 388