1 // FIXME: add +proj and -proj to adjust project results
2 
3 // dmdi -g -debug -version=vps locate stemmer.d -oflocate_vps -version=scgi
4 
5 // my local config assumes this will be on port 9653
6 
7 module adrdox.locate;
8 
9 import arsd.postgres;
10 
11 import ps = PorterStemmer;
12 import arsd.cgi;
13 import arsd.dom;
14 import std.stdio;
15 import std.file;
16 import std.conv : to;
17 import std.algorithm : sort;
18 import std.string : toLower, replace, split;
19 
20 PostgreSql db_;
21 
22 PostgreSql db() {
23 	if(db_ is null) {
24 		db_ = new PostgreSql("dbname=adrdox");
25 	}
26 	return db_;
27 }
28 
29 TermElement[] resultsByTerm(string term) {
30 	TermElement[] ret;
31 	foreach(row; db.query("SELECT d_symbols_id, score FROM auto_generated_tags WHERE tag = ? ORDER BY score DESC LIMIT 15", term))
32 		ret ~= TermElement(to!int(row[0]), to!int(row[1]));
33 	return ret;
34 }
35 
36 DeclElement getDecl(int i) {
37 	foreach(row; db.query("
38 		SELECT
39 			d_symbols.*,
40 			dub_package.url_name AS package_subdomain
41 		FROM
42 			d_symbols
43 		INNER JOIN
44 			package_version ON package_version.id = d_symbols.package_version_id
45 		INNER JOIN
46 			dub_package ON dub_package.id = package_version.dub_package_id
47 		WHERE
48 			d_symbols.id = ?
49 			AND
50 			is_latest = true
51 		", i)) {
52 		return DeclElement(row["fully_qualified_name"], row["summary"], row["url_name"], row["id"].to!int, "", 0, row["package_subdomain"]);
53 	}
54 	return DeclElement.init;
55 }
56 
57 static struct TermElement {
58 	int declId;
59 	int score;
60 }
61 
62 static struct DeclElement {
63 	string name;
64 	string description; // actually HTML
65 	string link;
66 	int id;
67 	string type;
68 	int parent;
69 	string packageName;
70 }
71 
72 static struct Magic {
73 	int declId;
74 	int score;
75 	DeclElement decl;
76 }
77 
78 int getProjectAdjustment(DeclElement details, string preferredProject) {
79 	int projectAdjustment;
80 	if(preferredProject.length) {
81 		if(preferredProject == details.packageName)
82 			projectAdjustment = 150;
83 	}
84 	if(details.packageName == "phobos" || details.packageName == "druntime")
85 		projectAdjustment += 50;
86 	if(details.packageName == "arsd-official")
87 		projectAdjustment += 30;
88 
89 	return projectAdjustment;
90 }
91 
92 Magic[] getPossibilities(string search, string preferredProject) {
93 	int[int] declScores;
94 
95 	int[int] declHits;
96 
97 	ps.PorterStemmer s;
98 
99 	auto terms = search.split(" ");// ~ search.split(".");
100 	// filter empty terms
101 	for(int i = 0; i < terms.length; i++) {
102 		if(terms[i].length == 0) {
103 			terms[i] = terms[$-1];
104 			terms = terms[0 .. $-1];
105 			i--;
106 		}
107 	}
108 
109 	void addHit(TermElement item, size_t idx) {
110 		if(idx == 0) {
111 			declScores[item.declId] += item.score;
112 			return;
113 		}
114 		if(item.declId in declScores) {
115 			declScores[item.declId] += 25; // hit both terms
116 			declScores[item.declId] += item.score;
117 		} else {
118 			// only hit one term...
119 			declScores[item.declId] += item.score / 2;
120 		}
121 	}
122 
123 	// On each term, we want to check for exact match and fuzzy match / natural language match.
124 	// FIXME: if something matches both it should be really strong. see time_t vs "time_t std.datetime"
125 	foreach(idx, term; terms) {
126 		assert(term.length > 0);
127 
128 		foreach(item; resultsByTerm(term)) {
129 			addHit(item, idx);
130 			declHits[item.declId] |= 1 << idx;
131 		}
132 		auto l = term.toLower;
133 		if(l != term)
134 			foreach(item; resultsByTerm(l)) {
135 				addHit(item, idx);
136 				declHits[item.declId] |= 1 << idx;
137 			}
138 		auto st = s.stem(term.toLower).idup;
139 		if(st != l)
140 			foreach(item; resultsByTerm(st)) {
141 				addHit(item, idx);
142 				declHits[item.declId] |= 1 << idx;
143 			}
144 	}
145 
146 	Magic[] magic;
147 
148 	foreach(decl, score; declScores) {
149 		auto hits = declHits[decl];
150 		foreach(idx, term; terms) {
151 			if(!(hits & (1 << idx)))
152 				score /= 2;
153 		}
154 		auto details = getDecl(decl);
155 		int projectAdjustment = getProjectAdjustment(details, preferredProject);
156 		magic ~= Magic(decl, score + projectAdjustment, details);
157 	}
158 
159 	if(magic.length == 0) {
160 		foreach(term; terms) {
161 			if(term.length == 0) continue;
162 			term = term.toLower();
163 			foreach(row; db.query("SELECT id, fully_qualified_name FROM d_symbols WHERE fully_qualified_name LIKE ?", term[0 .. 1] ~ "%")) {
164 				string name = row[1];
165 				int id = row[0].to!int;
166 				import std.algorithm;
167 				name = name.toLower;
168 				auto dist = cast(int) levenshteinDistance(name, term);
169 				if(dist <= 2) {
170 					auto details = getDecl(id);
171 					int projectAdjustment = getProjectAdjustment(details, preferredProject);
172 					magic ~= Magic(id, projectAdjustment + (3 - dist), details);
173 				}
174 			}
175 		}
176 		}
177 
178 		// boosts based on topography
179 		foreach(ref item; magic) {
180 			auto decl = item.decl;
181 			if(decl.type == "module") {
182 				// if it is a module, give it moar points
183 				item.score += 8;
184 				continue;
185 			}
186 			if(getDecl(decl.id).type == "module") {
187 				item.score += 5;
188 			}
189 		}
190 
191 		return magic;
192 	}
193 
194 import std.uri;
195 
196 void searcher(Cgi cgi) {
197 
198 	auto search = cgi.request("q", cgi.request("searchTerm", cgi.queryString));
199 
200 	version(vps) {
201 		string path = cgi.requestUri;
202 
203 		auto q = path.indexOf("?");
204 		if(q != -1) {
205 			path = path[0 .. q];
206 		}
207 
208 		if(path.length && path[0] == '/')
209 			path = path[1 .. $];
210 
211                 if(path.length == 0 && search.length == 0) {
212 			import std.file;
213 
214 			cgi.write(std.file.read("/dpldocs-build/search-home.html"), true);
215 			return;
216                 }
217 
218 
219 		if(path == "script.js") {
220 			import std.file;
221 			cgi.setResponseContentType("text/javascript");
222 			cgi.write(std.file.read("/dpldocs-build/script.js"), true);
223 			return;
224 
225 		}
226 
227 		if(path == "style.css") {
228 			import std.file;
229 			cgi.setResponseContentType("text/css");
230 			cgi.write(std.file.read("/dpldocs-build/style.css"), true);
231 			return;
232 		}
233 	}
234 
235 	alias searchTerm = search;
236 
237 	if(search.length == 0 && path.length)
238 		search = path;
239 
240 	if(search.length == 0) {
241 		cgi.setResponseLocation("/");
242 		return;
243 	}
244 	auto parts = search.split(" ");
245 	switch(parts[0].toLower()) {
246 		case "auto-ref-return-function-prototype":
247 			cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-ref-functions");
248 			return;
249 		case "auto-function-return-prototype":
250 			cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-functions");
251 			return;
252 		case "ref-function-return-prototype":
253 			cgi.setResponseLocation("http://dlang.org/spec/function.html#ref-functions");
254 			return;
255 		case "bugzilla":
256 			auto url = "http://d.puremagic.com/issues/";
257 			if(parts.length > 1)
258 				url ~= "show_bug.cgi?id=" ~ parts[1];
259 			cgi.setResponseLocation(url);
260 			return;
261 		case "dip":
262 			auto url = "http://wiki.dlang.org/DIPs";
263 			if(parts.length > 1)
264 				url = "http://wiki.dlang.org/DIP" ~ parts[1];
265 			cgi.setResponseLocation(url);
266 			return;
267 		case "wiki":
268 			auto url = "http://wiki.dlang.org/";
269 			if(parts.length > 1)
270 				url ~= "search="~std.uri.encodeComponent(join(parts[1..$], "
271 							"))~"&go=Go&title=Special%3ASearch";
272 			cgi.setResponseLocation(url);
273 			return;
274 		case "faqs":
275 		case "faq":
276 			cgi.setResponseLocation("http://wiki.dlang.org/FAQs");
277 			return;
278 		case "template-alias-parameter":
279 			cgi.setResponseLocation("https://dlang.org/spec/template.html#aliasparameters");
280 			return;
281 		case "is-expression":
282 			cgi.setResponseLocation("https://dlang.org/spec/expression.html#IsExpression");
283 			return;
284 		case "typeof-expression":
285 			cgi.setResponseLocation("https://dlang.org/spec/declaration.html#Typeof");
286 			return;
287 		case "oldwiki":
288 			auto url = "http://prowiki.org/wiki4d/wiki.cgi";
289 			if(parts.length > 1)
290 				url ~= "?formpage=Search&id=Search&search=" ~ std.uri.
291 					encodeComponent(join(parts[1..$], " "));
292 			cgi.setResponseLocation(url);
293 			return;
294 		default:
295 			// just continue
296 			version(vps) { } else {
297 			if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".1.html")) {
298 				cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".1.html");
299 				return;
300 			}
301 			if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".html")) {
302 				cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".html");
303 				return;
304 			}
305 				// redirect to vps
306 				if("local" !in cgi.get)
307 				cgi.setResponseLocation("//search.dpldocs.info/?q=" ~ std.uri.encodeComponent(searchTerm));
308 			}
309 	}
310 
311 
312 	Magic[] magic = getPossibilities(search, cgi.request("project"));
313 
314 	sort!((a, b) => a.score > b.score)(magic);
315 
316 	// adjustments based on previously showing results
317 	{
318 		bool[int] alreadyPresent;
319 		foreach(ref item; magic) {
320 			auto decl = item.decl;
321 			if(decl.parent in alreadyPresent)
322 				item.score -= 8;
323 			alreadyPresent[decl.id] = true;
324 		}
325 	}
326 
327 	auto document = new Document();
328 	version(vps) {
329 		import std.file;
330 		document.parseUtf8(readText("/dpldocs-build/skeleton.html"), true, true);
331 		document.title = "Dub Documentation Search";
332 	} else
333 		document.parseUtf8(import("skeleton.html"), true, true);
334 	document.title = "Search Results";
335 
336 	auto form = document.requireElementById!Form("search");
337 	form.setValue("searchTerm", search);
338 
339 	version(vps) {
340 		// intentionally blank
341 	} else {
342 		auto l = document.requireSelector("link");
343 		l.href = "/experimental-docs/" ~ l.href;
344 		l = document.requireSelector("script[src]");
345 		l.src = "/experimental-docs/" ~ l.src;
346 	}
347 
348 	auto pc = document.requireSelector("#page-content");
349 	pc.addChild("h1", "Search Results");
350 	auto ml = pc.addChild("dl");
351 	ml.className = "member-list";
352 
353 	string getFqn(DeclElement i) {
354 		string n;
355 		while(true) {
356 			if(n) n = "." ~ n;
357 			n = i.name ~ n;
358 			if(i.type == "module")
359 				break;
360 			if(i.parent == 0)
361 				break;
362 			i = getDecl(i.parent);
363 			if(i.id == 0)
364 				break;
365 		}
366 		return n;
367 	}
368 
369 	bool[string] alreadyPresent;
370 	int count = 0;
371 	foreach(idx, item; magic) {
372 		auto decl = item.decl;
373 		if(decl.id == 0) continue; // should never happen
374 		version(vps)
375 			auto link = "//"~decl.packageName~".dpldocs.info/" ~ decl.link;
376 		else
377 			auto link = "//dpldocs.info/experimental-docs/" ~ decl.link;
378 		if(decl.link.length && decl.link[0] == '/')
379 			link = decl.link;
380 		auto fqn = getFqn(decl);
381 		if(fqn in alreadyPresent)
382 			continue;
383 		alreadyPresent[fqn] = true;
384 		auto dt = ml.addChild("dt");
385 		dt.addClass("search-result");
386 		dt.addChild("span", decl.packageName).addClass("project-name");
387 		dt.addChild("br");
388 		dt.addChild("a", fqn.replace(".", ".\u200B"), link);
389 		dt.dataset.score = to!string(item.score);
390 		auto html = decl.description;
391 		//auto d = new Document(html);
392 		//writeln(d.root.innerText.replace("\n", " "));
393 		//writeln();
394 
395 		// FIXME fix relative links from here
396 		ml.addChild("dd", Html(html));
397 		count++;
398 
399 		if(count >= 20)
400 			break;
401 	}
402 
403 	cgi.write(document.toString, true);
404 }
405 
406 mixin GenericMain!(searcher);
407