format.writer source code

1 module format.writer;
2 
3 import format.chunk;
4 import format.config;
5 
6 import std.container.rbtree;
7 
8 string write(Chunk[] chunks, Config config) {
9 	auto context = Context(config, null);
10 	auto w = Writer(BlockSpecifier(chunks, LinePrefix(0, 0)), &context);
11 	w.write();
12 	w.buffer ~= '\n';
13 	return w.buffer.data;
14 }
15 
16 package:
17 
18 struct LinePrefix {
19 	uint indent;
20 	uint offset;
21 
22 	size_t toHash() const @safe nothrow {
23 		return indent * 0x9e3779b97f4a7c15 + offset + 0xbf4600628f7c64f5;
24 	}
25 }
26 
27 struct BlockSpecifier {
28 	Chunk[] chunks;
29 	LinePrefix prefix;
30 
31 	this(Chunk[] chunks, LinePrefix prefix) {
32 		this.chunks = chunks;
33 		this.prefix = prefix;
34 	}
35 
36 	bool opEquals(const ref BlockSpecifier rhs) const {
37 		return chunks is rhs.chunks && prefix == rhs.prefix;
38 	}
39 
40 	size_t toHash() const @safe nothrow {
41 		size_t h = cast(size_t) chunks.ptr;
42 
43 		h ^= (h >>> 33);
44 		h *= 0xff51afd7ed558ccd;
45 		h += chunks.length;
46 		h ^= (h >>> 33);
47 		h *= 0xc4ceb9fe1a85ec53;
48 
49 		return h + prefix.toHash();
50 	}
51 }
52 
53 struct FormatResult {
54 	uint cost;
55 	uint overflow;
56 	string text;
57 }
58 
59 struct Context {
60 	Config config;
61 	FormatResult[BlockSpecifier] cache;
62 }
63 
64 struct Writer {
65 	Context* context;
66 	alias context this;
67 
68 	uint cost;
69 	uint overflow;
70 
71 	Chunk[] chunks;
72 	LinePrefix prefix;
73 
74 	import std.array;
75 	Appender!string buffer;
76 
77 	this(BlockSpecifier block, Context* context) in {
78 		assert(context !is null);
79 	} do {
80 		chunks = block.chunks;
81 		prefix = block.prefix;
82 
83 		this.context = context;
84 	}
85 
86 	FormatResult write() {
87 		cost = 0;
88 		overflow = 0;
89 
90 		import std.array;
91 		buffer = appender!string();
92 
93 		size_t start = 0;
94 		foreach (i, ref c; chunks) {
95 			if (i == 0 || !c.startsUnwrappedLine) {
96 				continue;
97 			}
98 
99 			LineWriter(&this, chunks[start .. i]).write();
100 			start = i;
101 		}
102 
103 		// Make sure we write the last line too.
104 		LineWriter(&this, chunks[start .. $]).write();
105 
106 		return FormatResult(cost, overflow, buffer.data);
107 	}
108 
109 	FormatResult formatBlock(Chunk[] chunks, LinePrefix prefix) {
110 		auto block = BlockSpecifier(chunks, prefix);
111 		return cache.require(block, Writer(block, context).write());
112 	}
113 
114 	void output(char c) {
115 		buffer ~= c;
116 	}
117 
118 	void output(string s) {
119 		buffer ~= s;
120 	}
121 
122 	void indent(uint level) {
123 		if (config.useTabs) {
124 			foreach (_; 0 .. level) {
125 				output('\t');
126 			}
127 		} else {
128 			foreach (_; 0 .. level * config.indentationSize) {
129 				output(' ');
130 			}
131 		}
132 	}
133 
134 	void outputOffset(uint columns) {
135 		foreach (_; 0 .. columns) {
136 			output(' ');
137 		}
138 	}
139 }
140 
141 enum MAX_ATTEMPT = 5000;
142 
143 struct LineWriter {
144 	Writer* writer;
145 	alias writer this;
146 
147 	Chunk[] line;
148 
149 	this(Writer* writer, Chunk[] line) in {
150 		assert(line.length > 0, "line must not be empty");
151 	} do {
152 		this.writer = writer;
153 		this.line = line;
154 	}
155 
156 	void write() {
157 		auto state = findBestState();
158 
159 		cost += state.cost;
160 		overflow += state.overflow;
161 
162 		writeLine(state);
163 	}
164 
165 	void writeLine(SolveState state) {
166 		foreach (i, c; line) {
167 			uint lineCount = 0;
168 			if (i > 0 || line.ptr !is chunks.ptr) {
169 				assert(i == 0 || !c.startsUnwrappedLine, "Line splitting bug");
170 				lineCount = state.newLineCount(line, i);
171 			}
172 
173 			foreach (_; 0 .. lineCount) {
174 				output('\n');
175 			}
176 
177 			if ((i == 0 || lineCount > 0) && !c.glued) {
178 				auto prefix = state.getLinePrefix(line, i);
179 				indent(prefix.indent);
180 				outputOffset(prefix.offset);
181 			}
182 
183 			if (lineCount == 0 && c.separator == Separator.Space) {
184 				output(' ');
185 			}
186 
187 			final switch (c.kind) with (ChunkKind) {
188 				case Text:
189 					output(c.text);
190 					break;
191 
192 				case Block:
193 					auto f =
194 						formatBlock(c.chunks, state.getLinePrefix(line, i));
195 
196 					cost += f.cost;
197 					overflow += f.overflow;
198 
199 					output(f.text);
200 					break;
201 			}
202 		}
203 	}
204 
205 	SolveState findBestState() {
206 		auto best = SolveState(this);
207 		if (best.overflow == 0 || !best.canExpand) {
208 			// Either the line already fit, or it is not breakable.
209 			return best;
210 		}
211 
212 		static precisePred(const SolveState a, const SolveState b) {
213 			return a < b;
214 		}
215 
216 		auto checkpoints = CheckPoints(&this);
217 		return exploreStates!precisePred(best, checkpoints, MAX_ATTEMPT);
218 	}
219 
220 	SolveState exploreStates(
221 		alias pred
222 	)(SolveState best, ref CheckPoints checkpoints, uint max_attempts) {
223 		uint attempts = 0;
224 		scope queue = redBlackTree!pred(best);
225 
226 		// Once we have a solution that fits, or no more things
227 		// to try, then we are done.
228 		while (!queue.empty) {
229 			auto next = queue.front;
230 
231 			// We found the lowest cost solution that fit on the page.
232 			if (next.overflow == 0 || attempts > max_attempts) {
233 				break;
234 			}
235 
236 			// We pop from the queue *AFTER* checking for termination condition
237 			// so that we do nto lose that state for the next iterration.
238 			queue.removeFront();
239 
240 			// We reach a dead subtree, there is no point erxploring further.
241 			if (next.isDeadSubTree(best)) {
242 				break;
243 			}
244 
245 			// There is no point trying to expand this if it cannot
246 			// lead to a a solution better than the current best.
247 			if (checkpoints.isRedundant(next, false)) {
248 				continue;
249 			}
250 
251 			// This algorithm is exponential in nature, so make sure to stop
252 			// after some time, even if we haven't found an optimal solution.
253 			attempts++;
254 
255 			bool split = false;
256 			foreach (rule; next.ruleValues.frozen .. line.length) {
257 				if (split) {
258 					// We reach a split point, bail.
259 					break;
260 				}
261 
262 				split = next.isSplit(rule);
263 				if (!split && !next.canSplit(line, rule)) {
264 					// If we cannot split here, move on.
265 					continue;
266 				}
267 
268 				auto newRuleValues = next.ruleValues.withFrozenSplit(rule);
269 				auto candidate = SolveState(this, newRuleValues);
270 
271 				if (candidate.isBetterThan(best)) {
272 					best = candidate;
273 				}
274 
275 				// We check for redundant path first so that, even in the
276 				// case where this candidate is rulled out, it can serve
277 				// as a checkpoint.
278 				if (checkpoints.isRedundant(candidate, true)) {
279 					continue;
280 				}
281 
282 				// This candidate can never expand to something better than the best.
283 				if (candidate.isDeadSubTree(best)) {
284 					continue;
285 				}
286 
287 				queue.insert(candidate);
288 			}
289 		}
290 
291 		return best;
292 	}
293 }
294 
295 struct CheckPoints {
296 	LineWriter* lineWriter;
297 	SolveState[ulong][] paths;
298 
299 	this(LineWriter* lineWriter) {
300 		this.lineWriter = lineWriter;
301 		this.paths.length = lineWriter.line.length;
302 	}
303 
304 	import format.span;
305 	static getSpanStateHash(const ref SolveState s, const Span span, ulong h) {
306 		if (span is null) {
307 			h ^= (h >>> 33);
308 			h *= 0x4cd6944c5cc20b6d;
309 			h ^= (h >>> 33);
310 			h *= 0xfc12c5b19d3259e9;
311 
312 			return h;
313 		}
314 
315 		h ^= (h >>> 33);
316 		h *= 0x7fb5d329728ea185;
317 		h ^= (h >>> 33);
318 		h *= 0x81dadef4bc2dd44d;
319 
320 		h += span.getState(s);
321 
322 		h ^= (h >>> 33);
323 		h *= 0x99bcf6822b23ca35;
324 		h ^= (h >>> 33);
325 		h *= 0x14020a57acced8b7;
326 
327 		h += s.isUsed(span);
328 
329 		return getSpanStateHash(s, span.parent, h);
330 	}
331 
332 	ulong getPathHash(const ref SolveState s, size_t i) {
333 		const prefix = s.getLinePrefix(lineWriter.line, i);
334 		return getSpanStateHash(s, lineWriter.line[i].span, prefix.toHash());
335 	}
336 
337 	static isSameSpanState(const ref SolveState a, const ref SolveState b,
338 	                       const Span span) {
339 		if (span is null) {
340 			return true;
341 		}
342 
343 		if (a.isUsed(span) != b.isUsed(span)) {
344 			return false;
345 		}
346 
347 		if (span.getState(a) != span.getState(b)) {
348 			return false;
349 		}
350 
351 		return isSameSpanState(a, b, span.parent);
352 	}
353 
354 	bool isSamePath(const ref SolveState a, const ref SolveState b, size_t i) {
355 		if (a == b) {
356 			return false;
357 		}
358 
359 		if (a.getLinePrefix(lineWriter.line, i)
360 			    != b.getLinePrefix(lineWriter.line, i)) {
361 			return false;
362 		}
363 
364 		return isSameSpanState(a, b, lineWriter.line[i].span);
365 	}
366 
367 	bool isRedundant(const ref SolveState s, bool isRedundantWithSelf) {
368 		if (!s.canExpand || s.ruleValues.frozen >= lineWriter.line.length) {
369 			// There is nothing more to explore down this path.
370 			return true;
371 		}
372 
373 		const i = s.ruleValues.frozen - 1;
374 		const h = getPathHash(s, i);
375 
376 		const c = h in paths[i];
377 		const cmp = s.opCmp(c);
378 		if (cmp == 0) {
379 			// A state is always redundant with itself.
380 			return isRedundantWithSelf;
381 		}
382 
383 		if (cmp < 0) {
384 			// We have a new best on that specific path.
385 			paths[i][h] = cast() s;
386 			return false;
387 		}
388 
389 		// We are on a path that is worse than the checkpoint.
390 		// If this isn't a collision, this path is redundant.
391 		return isSamePath(s, *c, i);
392 	}
393 }
394 
395 struct SolveState {
396 	uint cost = 0;
397 	uint overflow = 0;
398 	uint sunk = 0;
399 
400 	LinePrefix prefix;
401 
402 	import format.rulevalues;
403 	RuleValues ruleValues;
404 
405 	import format.span, std.bitmanip;
406 	mixin(taggedClassRef!(
407 		// sdfmt off
408 		// Spans that require indentation.
409 		RedBlackTree!(const Span), "usedSpans",
410 		bool, "canExpand", 1,
411 		// sdfmt on
412 	));
413 
414 	this(ref LineWriter lineWriter) {
415 		this(lineWriter, RuleValues(1, lineWriter.line.length));
416 	}
417 
418 	this(ref LineWriter lineWriter, RuleValues ruleValues) {
419 		this.ruleValues = ruleValues;
420 		this.prefix = lineWriter.prefix;
421 
422 		computeCost(lineWriter.line, lineWriter.writer);
423 	}
424 
425 	void computeCost(Chunk[] line, Writer* writer) {
426 		sunk = 0;
427 		overflow = 0;
428 		cost = 0;
429 
430 		// If there is nothing to be done, just skip.
431 		if (line.length == 0) {
432 			return;
433 		}
434 
435 		// Preparatory work.
436 		computeUsedSpans(line);
437 
438 		// All the span which do not fit on one line.
439 		RedBlackTree!Span brokenSpans;
440 
441 		uint length = 0;
442 		uint previousColumn = 0;
443 		Span previousSpan = null;
444 		size_t regionStart = 0;
445 
446 		const indentationSize = writer.config.indentationSize;
447 		const pageWidth = writer.config.pageWidth;
448 
449 		foreach (i, ref c; line) {
450 			uint lineLength = 0;
451 			uint column = 0;
452 
453 			if (c.startsRegion) {
454 				regionStart = i;
455 			}
456 
457 			if (c.kind == ChunkKind.Block) {
458 				auto f = writer.formatBlock(c.chunks, getLinePrefix(line, i));
459 
460 				// Compute the column at which the block starts.
461 				auto text = f.text;
462 
463 				foreach (n; 0 .. f.text.length) {
464 					if (text[n] == ' ') {
465 						column++;
466 						continue;
467 					}
468 
469 					if (text[n] == '\t') {
470 						column += indentationSize;
471 						continue;
472 					}
473 
474 					break;
475 				}
476 
477 				cost += f.cost;
478 				overflow += f.overflow;
479 				updateSunk(line, i);
480 			} else {
481 				if (newLineCount(line, i) == 0) {
482 					length += (c.separator == Separator.Space) + c.length;
483 					continue;
484 				}
485 
486 				column = 0;
487 				lineLength = c.length;
488 
489 				if (!c.glued) {
490 					const prefix = getLinePrefix(line, i);
491 					column = prefix.indent * indentationSize + prefix.offset;
492 				}
493 			}
494 
495 			if (i > 0) {
496 				// Try to avoid subsequent line to have the same indentation
497 				// level if they belong to a different span.
498 				uint penality = computeNewLinePenality(
499 					c, column, length, previousColumn, previousSpan);
500 
501 				// End the previous line if there is one.
502 				endLine(line, i, length, pageWidth, penality);
503 			}
504 
505 			previousSpan = c.span;
506 			previousColumn = column;
507 			length = column + lineLength;
508 
509 			if (c.continuation) {
510 				continue;
511 			}
512 
513 			cost += 1;
514 
515 			auto span = c.span;
516 			bool needInsert = true;
517 
518 			// Make sure to keep track of the span that cross over line breaks.
519 			while (span !is null && needInsert) {
520 				scope(success) span = span.parent;
521 
522 				if (brokenSpans is null) {
523 					brokenSpans = redBlackTree!Span();
524 				}
525 
526 				needInsert = brokenSpans.insert(span) > 0;
527 			}
528 		}
529 
530 		endLine(line, line.length, length, pageWidth);
531 
532 		// Account for the cost of breaking spans.
533 		if (brokenSpans !is null) {
534 			foreach (s; brokenSpans) {
535 				cost += s.getCost(this);
536 			}
537 		}
538 	}
539 
540 	void computeUsedSpans(Chunk[] line) {
541 		auto freezePoint = ruleValues.frozen;
542 		ruleValues.frozen = 1;
543 
544 		scope(success) {
545 			ruleValues.frozen = freezePoint;
546 		}
547 
548 		foreach (i, ref c; line) {
549 			// Continuation are not considered line splits.
550 			if (c.continuation) {
551 				continue;
552 			}
553 
554 			if (!isSplit(i)) {
555 				if (!mustSplit(line, i)) {
556 					continue;
557 				}
558 
559 				// Mark this as split.
560 				ruleValues[i] = true;
561 			}
562 
563 			// If there are no spans to break, move on.
564 			if (c.span is null) {
565 				continue;
566 			}
567 
568 			if (usedSpans is null) {
569 				usedSpans = redBlackTree!(const(Span))();
570 			}
571 
572 			usedSpans.insert(c.span);
573 		}
574 	}
575 
576 	uint computeNewLinePenality(
577 		const ref Chunk c,
578 		uint column,
579 		uint length,
580 		uint previousColumn,
581 		const(Span) previousSpan
582 	) const {
583 		// No penality for top level.
584 		if (column == 0) {
585 			return 0;
586 		}
587 
588 		// Avoid line break that make the next lien starts past the previous line.
589 		if (column >= length && c.kind != ChunkKind.Block) {
590 			return 1;
591 		}
592 
593 		// No penality for mismatching levels.
594 		if (column != previousColumn) {
595 			return 0;
596 		}
597 
598 		// No penality for double line breaks.
599 		if (c.separator == Separator.TwoNewLines) {
600 			return 0;
601 		}
602 
603 		// If both spans are expected to be on the same level,
604 		// then it's all good.
605 		if (previousSpan.isSameLevel(c.span)) {
606 			return 0;
607 		}
608 
609 		// This new line is at the same level as the previous line, yet belong to another span.
610 		// This tends to make the code confusing to read, so we penalize this solution.
611 		return 1;
612 	}
613 
614 	void updateSunk(const Chunk[] line, size_t i) {
615 		if (canExpand) {
616 			return;
617 		}
618 
619 		foreach (j; ruleValues.frozen .. i) {
620 			if (canSplit(line, j)) {
621 				canExpand = true;
622 				return;
623 			}
624 		}
625 
626 		// If the line overflow, but has no split point, it is sunk.
627 		sunk = overflow;
628 	}
629 
630 	void endLine(const Chunk[] line, size_t i, uint length, uint pageWidth,
631 	             uint penality = 0) {
632 		if (length > pageWidth) {
633 			penality += length - pageWidth;
634 		}
635 
636 		if (penality > 0) {
637 			overflow += penality;
638 			updateSunk(line, i);
639 		}
640 	}
641 
642 	bool canSplit(const Chunk[] line, size_t i) const {
643 		if (isSplit(i)) {
644 			return false;
645 		}
646 
647 		auto c = line[i];
648 		if (!c.canSplit()) {
649 			return false;
650 		}
651 
652 		return c.span.canSplit(this, i);
653 	}
654 
655 	bool mustSplit(const Chunk[] line, size_t i) const {
656 		auto c = line[i];
657 		return c.newLineCount() > 0 || c.span.mustSplit(this, i);
658 	}
659 
660 	bool isSplit(size_t i) const {
661 		return ruleValues[i];
662 	}
663 
664 	bool isSplit(size_t from, size_t to) const {
665 		foreach (i; from .. to) {
666 			if (isSplit(i)) {
667 				return true;
668 			}
669 		}
670 
671 		return false;
672 	}
673 
674 	uint newLineCount(const Chunk[] line, size_t i) const {
675 		if (auto c = line[i].newLineCount()) {
676 			return c;
677 		}
678 
679 		return isSplit(i);
680 	}
681 
682 	bool isUsed(const Span span) const {
683 		return usedSpans !is null && span in usedSpans;
684 	}
685 
686 	LinePrefix getLinePrefix(Chunk[] line, size_t i) const {
687 		uint offset = 0;
688 		const c = line[i];
689 
690 		// Find the preceding line break.
691 		size_t r = c.span.getAlignIndex(this, i);
692 		while (r > 0 && newLineCount(line, r) == 0) {
693 			offset += line[r].separator == Separator.Space;
694 			offset += line[--r].length;
695 		}
696 
697 		if (line[r].glued) {
698 			return LinePrefix(0, offset);
699 		}
700 
701 		if (r == 0 || r == i) {
702 			// We don't need to do any alignement magic.
703 			const indent = c.indentation + c.span.getIndent(this, i);
704 			return LinePrefix(prefix.indent + indent, prefix.offset + offset);
705 		}
706 
707 		const base = getLinePrefix(line, r);
708 		const indent = line[i].span.getExtraIndent(line[r].span, this, i);
709 
710 		return LinePrefix(base.indent + indent, base.offset + offset);
711 	}
712 
713 	// Return if this solve state must be chosen over rhs as a solution.
714 	bool isDeadSubTree(const ref SolveState best) const {
715 		if (sunk > best.overflow) {
716 			// We already have comitted to an overflow greater than the best.
717 			return true;
718 		}
719 
720 		if (sunk == best.overflow && cost >= best.cost) {
721 			// We already comitted to a cost greater than the best.
722 			return true;
723 		}
724 
725 		// There is still hope to find a better solution down that path.
726 		return false;
727 	}
728 
729 	// Return if this solve state must be chosen over rhs as a solution.
730 	bool isBetterThan(const ref SolveState rhs) const {
731 		if (overflow != rhs.overflow) {
732 			return overflow < rhs.overflow;
733 		}
734 
735 		if (cost != rhs.cost) {
736 			return cost < rhs.cost;
737 		}
738 
739 		return ruleValues < rhs.ruleValues;
740 	}
741 
742 	// lhs < rhs => lhs.opCmp(rhs) < 0
743 	int opCmp(const ref SolveState rhs) const {
744 		if (sunk != rhs.sunk) {
745 			return sunk - rhs.sunk;
746 		}
747 
748 		if (cost != rhs.cost) {
749 			return cost - rhs.cost;
750 		}
751 
752 		if (overflow != rhs.overflow) {
753 			return overflow - rhs.overflow;
754 		}
755 
756 		return ruleValues.opCmp(rhs.ruleValues);
757 	}
758 
759 	int opCmp(const SolveState* rhs) const {
760 		if (rhs is null) {
761 			return -1;
762 		}
763 
764 		return this.opCmp(*rhs);
765 	}
766 }