1 // FIXME: add +proj and -proj to adjust project results
2 
3 // dmdi -g -debug -version=vps locate stemmer.d -oflocate_vps -version=scgi
4 
5 // my local config assumes this will be on port 9653
6 
7 module adrdox.locate;
8 
9 import arsd.postgres;
10 
11 import ps = PorterStemmer;
12 import arsd.cgi;
13 import arsd.dom;
14 import std.stdio;
15 import std.file;
16 import std.conv : to;
17 import std.algorithm : sort;
18 import std.string : toLower, replace, split;
19 
20 PostgreSql db_;
21 
22 PostgreSql db() {
23 	if(db_ is null) {
24 		db_ = new PostgreSql("dbname=adrdox");
25 	}
26 	return db_;
27 }
28 
29 TermElement[] resultsByTerm(string term) {
30 	TermElement[] ret;
31 	foreach(row; db.query("SELECT d_symbols.id, score FROM hand_written_tags INNER JOIN d_symbols ON d_symbol_fully_qualified_name = fully_qualified_name WHERE tag ILIKE ? ORDER BY score DESC", term))
32 		ret ~= TermElement(to!int(row[0]), to!int(row[1]));
33 
34 	foreach(row; db.query("
35 		SELECT
36 			d_symbols.id, fully_qualified_name
37 		FROM
38 			d_symbols
39 		INNER JOIN
40 			package_version ON package_version_id = package_version.id
41 		WHERE
42 			is_latest = true
43 			AND
44 			(
45 				fully_qualified_name = ?
46 				OR
47 				name = ?
48 				OR
49 				substring(fully_qualified_name, length(module_name) + 2) = ?
50 			)
51 	", term, term, term)) {
52 		ret ~= TermElement(to!int(row[0]), row[1] == term ? 50 : 25);
53 	}
54 
55 	version(none)
56 	foreach(row; db.query("
57 		SELECT
58 			d_symbols_id, score
59 		FROM
60 			auto_generated_tags
61 		INNER JOIN
62 			package_version ON package_version_id = package_version.id
63 		WHERE
64 			tag ILIKE ?
65 			AND
66 			is_latest = true
67 		ORDER BY
68 			score + (case (dub_package_id = 6 or dub_package_id = 9) when true then 5 else 0 end) DESC
69 		", term))
70 		ret ~= TermElement(to!int(row[0]), to!int(row[1]));
71 	return ret;
72 }
73 
74 DeclElement getDecl(int i) {
75 	foreach(row; db.query("
76 		SELECT
77 			d_symbols.*,
78 			dub_package.url_name AS package_subdomain
79 		FROM
80 			d_symbols
81 		INNER JOIN
82 			package_version ON package_version.id = d_symbols.package_version_id
83 		INNER JOIN
84 			dub_package ON dub_package.id = package_version.dub_package_id
85 		WHERE
86 			d_symbols.id = ?
87 			AND
88 			is_latest = true
89 		", i)) {
90 		return DeclElement(row["fully_qualified_name"], row["summary"], row["url_name"], row["id"].to!int, "", 0, row["package_subdomain"]);
91 	}
92 	return DeclElement.init;
93 }
94 
95 static struct TermElement {
96 	int declId;
97 	int score;
98 }
99 
100 static struct DeclElement {
101 	string name;
102 	string description; // actually HTML
103 	string link;
104 	int id;
105 	string type;
106 	int parent;
107 	string packageName;
108 }
109 
110 static struct Magic {
111 	int declId;
112 	int score;
113 	DeclElement decl;
114 }
115 
116 int getProjectAdjustment(DeclElement details, string preferredProject) {
117 	int projectAdjustment;
118 	if(preferredProject.length) {
119 		if(preferredProject == details.packageName)
120 			projectAdjustment = 150;
121 	}
122 	if(details.packageName == "phobos" || details.packageName == "druntime")
123 		projectAdjustment += 50;
124 	if(details.packageName == "arsd-official")
125 		projectAdjustment += 30;
126 
127 	return projectAdjustment;
128 }
129 
130 Magic[] getPossibilities(string search, string preferredProject) {
131 	int[int] declScores;
132 
133 	int[int] declHits;
134 
135 	// ps.PorterStemmer s;
136 
137 	auto terms = search.split(" ");// ~ search.split(".");
138 	// filter empty terms
139 	for(int i = 0; i < terms.length; i++) {
140 		if(terms[i].strip.length == 0) {
141 			terms[i] = terms[$-1];
142 			terms = terms[0 .. $-1];
143 			i--;
144 		}
145 	}
146 
147 	void addHit(TermElement item, size_t idx) {
148 		if(idx == 0) {
149 			declScores[item.declId] += item.score;
150 			return;
151 		}
152 		if(item.declId in declScores) {
153 			//declScores[item.declId] += 25; // hit both terms
154 			declScores[item.declId] += item.score;
155 		} else {
156 			// only hit one term...
157 			declScores[item.declId] += item.score / 2;
158 		}
159 	}
160 
161 	// On each term, we want to check for exact match and fuzzy match / natural language match.
162 	// FIXME: if something matches both it should be really strong. see time_t vs "time_t std.datetime"
163 	foreach(idx, term; terms) {
164 		assert(term.length > 0);
165 
166 		foreach(item; resultsByTerm(term)) {
167 			addHit(item, idx);
168 			declHits[item.declId] |= 1 << idx;
169 		}
170 		/+
171 		auto st = s.stem(term.toLower).idup;
172 		if(st != l)
173 			foreach(item; resultsByTerm(st)) {
174 				addHit(item, idx);
175 				declHits[item.declId] |= 1 << idx;
176 			}
177 		+/
178 	}
179 
180 	Magic[] magic;
181 
182 	string[string] fqns;
183 
184 	foreach(decl, score; declScores) {
185 		auto hits = declHits[decl];
186 		foreach(idx, term; terms) {
187 			if(!(hits & (1 << idx)))
188 				score /= 2;
189 		}
190 		auto details = getDecl(decl);
191 		/+
192 		if(details.name in fqns)
193 			continue;
194 		fqns[details.name] = details.name;
195 		+/
196 		int projectAdjustment = getProjectAdjustment(details, preferredProject);
197 		magic ~= Magic(decl, score + projectAdjustment, details);
198 	}
199 
200 	if(magic.length == 0) {
201 		foreach(term; terms) {
202 			if(term.length == 0) continue;
203 			//term = term.toLower();
204 			foreach(row; db.query("SELECT id, fully_qualified_name FROM d_symbols WHERE fully_qualified_name > ? LIMIT 50", term)) {
205 				string name = row[1];
206 				int id = row[0].to!int;
207 				/+
208 				import std.algorithm;
209 				name = name.toLower;
210 				auto dist = cast(int) levenshteinDistance(name, term);
211 				if(dist <= 2) {
212 				+/
213 				int dist = 0;
214 				{
215 					auto details = getDecl(id);
216 					int projectAdjustment = getProjectAdjustment(details, preferredProject);
217 					magic ~= Magic(id, projectAdjustment + (3 - dist), details);
218 				}
219 			}
220 		}
221 		}
222 
223 		// boosts based on topography
224 		foreach(ref item; magic) {
225 			auto decl = item.decl;
226 			if(decl.type == "module") {
227 				// if it is a module, give it moar points
228 				item.score += 8;
229 				continue;
230 			}
231 			if(getDecl(decl.id).type == "module") {
232 				item.score += 5;
233 			}
234 		}
235 
236 		return magic;
237 	}
238 
239 import std.uri;
240 
241 void searcher(Cgi cgi) {
242 
243 	auto search = cgi.request("q", cgi.request("searchTerm", cgi.queryString));
244 
245 	version(vps) {
246 		string path = cgi.requestUri;
247 
248 		auto q = path.indexOf("?");
249 		if(q != -1) {
250 			path = path[0 .. q];
251 		}
252 
253 		if(path.length && path[0] == '/')
254 			path = path[1 .. $];
255 
256                 if(path.length == 0 && search.length == 0) {
257 			import std.file;
258 
259 			cgi.write(std.file.read("/dpldocs-build/search-home.html"), true);
260 			return;
261                 }
262 
263 
264 		if(path == "script.js") {
265 			import std.file;
266 			cgi.setResponseContentType("text/javascript");
267 			cgi.write(std.file.read("/dpldocs-build/script.js"), true);
268 			return;
269 
270 		}
271 
272 		if(path == "style.css") {
273 			import std.file;
274 			cgi.setResponseContentType("text/css");
275 			cgi.write(std.file.read("/dpldocs-build/style.css"), true);
276 			return;
277 		}
278 	} else {
279 		string path = cgi.requestUri;
280 
281 		auto q = path.indexOf("?");
282 		if(q != -1) {
283 			path = path[0 .. q];
284 		}
285 
286 		if(path.length && path[0] == '/')
287 			path = path[1 .. $];
288 
289 
290 	}
291 
292 	alias searchTerm = search;
293 
294 	if(search.length == 0 && path.length)
295 		search = path;
296 
297 	if(search.length == 0) {
298 		cgi.setResponseLocation("/");
299 		return;
300 	}
301 	auto parts = search.split(" ");
302 	switch(parts[0].toLower()) {
303 		case "auto-ref-return-function-prototype":
304 			cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-ref-functions");
305 			return;
306 		case "auto-function-return-prototype":
307 			cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-functions");
308 			return;
309 		case "ref-function-return-prototype":
310 			cgi.setResponseLocation("http://dlang.org/spec/function.html#ref-functions");
311 			return;
312 		case "bugzilla":
313 			auto url = "http://d.puremagic.com/issues/";
314 			if(parts.length > 1)
315 				url ~= "show_bug.cgi?id=" ~ parts[1];
316 			cgi.setResponseLocation(url);
317 			return;
318 		case "dip":
319 			auto url = "http://wiki.dlang.org/DIPs";
320 			if(parts.length > 1)
321 				url = "http://wiki.dlang.org/DIP" ~ parts[1];
322 			cgi.setResponseLocation(url);
323 			return;
324 		case "wiki":
325 			auto url = "http://wiki.dlang.org/";
326 			if(parts.length > 1)
327 				url ~= "search="~std.uri.encodeComponent(join(parts[1..$], "
328 							"))~"&go=Go&title=Special%3ASearch";
329 			cgi.setResponseLocation(url);
330 			return;
331 		case "faqs":
332 		case "faq":
333 			cgi.setResponseLocation("http://wiki.dlang.org/FAQs");
334 			return;
335 		case "template-alias-parameter":
336 			cgi.setResponseLocation("https://dlang.org/spec/template.html#aliasparameters");
337 			return;
338 		case "is-expression":
339 			cgi.setResponseLocation("https://dlang.org/spec/expression.html#IsExpression");
340 			return;
341 		case "typeof-expression":
342 			cgi.setResponseLocation("https://dlang.org/spec/declaration.html#Typeof");
343 			return;
344 		case "oldwiki":
345 			auto url = "http://prowiki.org/wiki4d/wiki.cgi";
346 			if(parts.length > 1)
347 				url ~= "?formpage=Search&id=Search&search=" ~ std.uri.
348 					encodeComponent(join(parts[1..$], " "));
349 			cgi.setResponseLocation(url);
350 			return;
351 		default:
352 			// just continue
353 			version(vps) { } else {
354 			/+
355 			if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".1.html")) {
356 				cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".1.html");
357 				return;
358 			}
359 			if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".html")) {
360 				cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".html");
361 				return;
362 			}
363 			+/
364 				// redirect to vps
365 				if("local" !in cgi.get)
366 				cgi.setResponseLocation("//search.dpldocs.info/?q=" ~ std.uri.encodeComponent(searchTerm));
367 			}
368 	}
369 
370 
371 	Magic[] magic = getPossibilities(search, cgi.request("project"));
372 
373 	sort!((a, b) => a.score > b.score)(magic);
374 
375 	// adjustments based on previously showing results
376 	{
377 		bool[int] alreadyPresent;
378 		foreach(ref item; magic) {
379 			auto decl = item.decl;
380 			if(decl.parent in alreadyPresent)
381 				item.score -= 8;
382 			alreadyPresent[decl.id] = true;
383 		}
384 	}
385 
386 	auto document = new Document();
387 	version(vps) {
388 		import std.file;
389 		document.parseUtf8(readText("/dpldocs-build/skeleton.html"), true, true);
390 		document.title = "Dub Documentation Search";
391 	} else
392 		document.parseUtf8(import("skeleton.html"), true, true);
393 	document.title = "Search Results";
394 
395 	auto form = document.requireElementById!Form("search");
396 	form.setValue("searchTerm", search);
397 
398 	version(vps) {
399 		// intentionally blank
400 	} else {
401 		auto l = document.requireSelector("link");
402 		l.href = "/experimental-docs/" ~ l.href;
403 		l = document.requireSelector("script[src]");
404 		l.src = "/experimental-docs/" ~ l.src;
405 	}
406 
407 	auto pc = document.requireSelector("#page-content");
408 	pc.addChild("h1", "Search Results");
409 	auto ml = pc.addChild("dl");
410 	ml.className = "member-list";
411 
412 	string getFqn(DeclElement i) {
413 		string n;
414 		while(true) {
415 			if(n) n = "." ~ n;
416 			n = i.name ~ n;
417 			if(i.type == "module")
418 				break;
419 			if(i.parent == 0)
420 				break;
421 			i = getDecl(i.parent);
422 			if(i.id == 0)
423 				break;
424 		}
425 		return n;
426 	}
427 
428 	bool[string] alreadyPresent;
429 	int count = 0;
430 	foreach(idx, item; magic) {
431 		auto decl = item.decl;
432 		if(decl.id == 0) continue; // should never happen
433 		version(vps)
434 			auto link = "//"~decl.packageName~".dpldocs.info/" ~ decl.link;
435 		else
436 			auto link = "//dpldocs.info/experimental-docs/" ~ decl.link;
437 		if(decl.link.length && decl.link[0] == '/')
438 			link = decl.link;
439 		auto fqn = getFqn(decl);
440 		if(fqn in alreadyPresent)
441 			continue;
442 		alreadyPresent[fqn] = true;
443 		auto dt = ml.addChild("dt");
444 		dt.addClass("search-result");
445 		dt.addChild("span", decl.packageName).addClass("project-name");
446 		dt.addChild("br");
447 		dt.addChild("a", fqn.replace(".", ".\u200B"), link);
448 		dt.dataset.score = to!string(item.score);
449 		auto html = decl.description;
450 		//auto d = new Document(html);
451 		//writeln(d.root.innerText.replace("\n", " "));
452 		//writeln();
453 
454 		// FIXME fix relative links from here
455 		ml.addChild("dd", Html(html));
456 		foreach(a; ml.querySelectorAll("a[href]")) {
457 			auto uri = Uri(a.href).basedOn(Uri("//" ~ decl.packageName~".dpldocs.info/"));
458 			a.href = uri.toString;
459 		}
460 		count++;
461 
462 		if(count >= 20)
463 			break;
464 	}
465 
466 	cgi.write(document.toString, true);
467 }
468 
469 mixin GenericMain!(searcher);
470