1// Copyright 2019 The Go Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package span
16
17import (
18	"fmt"
19	"unicode/utf16"
20	"unicode/utf8"
21)
22
23// ToUTF16Column calculates the utf16 column expressed by the point given the
24// supplied file contents.
25// This is used to convert from the native (always in bytes) column
26// representation and the utf16 counts used by some editors.
27func ToUTF16Column(p Point, content []byte) (int, error) {
28	if content == nil {
29		return -1, fmt.Errorf("ToUTF16Column: missing content")
30	}
31	if !p.HasPosition() {
32		return -1, fmt.Errorf("ToUTF16Column: point is missing position")
33	}
34	if !p.HasOffset() {
35		return -1, fmt.Errorf("ToUTF16Column: point is missing offset")
36	}
37	offset := p.Offset()      // 0-based
38	colZero := p.Column() - 1 // 0-based
39	if colZero == 0 {
40		// 0-based column 0, so it must be chr 1
41		return 1, nil
42	} else if colZero < 0 {
43		return -1, fmt.Errorf("ToUTF16Column: column is invalid (%v)", colZero)
44	}
45	// work out the offset at the start of the line using the column
46	lineOffset := offset - colZero
47	if lineOffset < 0 || offset > len(content) {
48		return -1, fmt.Errorf("ToUTF16Column: offsets %v-%v outside file contents (%v)", lineOffset, offset, len(content))
49	}
50	// Use the offset to pick out the line start.
51	// This cannot panic: offset > len(content) and lineOffset < offset.
52	start := content[lineOffset:]
53
54	// Now, truncate down to the supplied column.
55	start = start[:colZero]
56
57	// and count the number of utf16 characters
58	// in theory we could do this by hand more efficiently...
59	return len(utf16.Encode([]rune(string(start)))) + 1, nil
60}
61
62// FromUTF16Column advances the point by the utf16 character offset given the
63// supplied line contents.
64// This is used to convert from the utf16 counts used by some editors to the
65// native (always in bytes) column representation.
66func FromUTF16Column(p Point, chr int, content []byte) (Point, error) {
67	if !p.HasOffset() {
68		return Point{}, fmt.Errorf("FromUTF16Column: point is missing offset")
69	}
70	// if chr is 1 then no adjustment needed
71	if chr <= 1 {
72		return p, nil
73	}
74	if p.Offset() >= len(content) {
75		return p, fmt.Errorf("FromUTF16Column: offset (%v) greater than length of content (%v)", p.Offset(), len(content))
76	}
77	remains := content[p.Offset():]
78	// scan forward the specified number of characters
79	for count := 1; count < chr; count++ {
80		if len(remains) <= 0 {
81			return Point{}, fmt.Errorf("FromUTF16Column: chr goes beyond the content")
82		}
83		r, w := utf8.DecodeRune(remains)
84		if r == '\n' {
85			// Per the LSP spec:
86			//
87			// > If the character value is greater than the line length it
88			// > defaults back to the line length.
89			break
90		}
91		remains = remains[w:]
92		if r >= 0x10000 {
93			// a two point rune
94			count++
95			// if we finished in a two point rune, do not advance past the first
96			if count >= chr {
97				break
98			}
99		}
100		p.v.Column += w
101		p.v.Offset += w
102	}
103	return p, nil
104}
105