rust-analyzer

Unnamed repository; edit this file 'description' to name the repository.

Clone

HTTPS

SSH

Auto merge of #14733 - azdavis:master, r=matklad

Make line-index a lib, use nohash_hasher These seem like they are not specific to rust-analyzer and could be pulled out to their own libraries. So I did. https://github.com/azdavis/millet/issues/31

Browse Source

bors 2023-05-07

parent a10fd83 · parent 60056b8 · commit 260e996

Diffstat

-rw-r--r--

Cargo.lock

-rw-r--r--

Cargo.toml

-rw-r--r--

crates/hir-ty/Cargo.toml

-rw-r--r--

crates/hir-ty/src/test_db.rs

-rw-r--r--

crates/ide-db/Cargo.toml

-rw-r--r--

crates/ide-db/src/lib.rs

-rw-r--r--

crates/ide-db/src/line_index.rs

317

-rw-r--r--

crates/ide-db/src/search.rs

-rw-r--r--

crates/ide-db/src/source_change.rs

-rw-r--r--

crates/ide-db/src/tests/line_index.rs

-rw-r--r--

crates/ide-ssr/Cargo.toml

-rw-r--r--

crates/ide-ssr/src/lib.rs

-rw-r--r--

crates/ide/Cargo.toml

-rw-r--r--

crates/ide/src/references.rs

-rw-r--r--

crates/rust-analyzer/Cargo.toml

-rw-r--r--

crates/rust-analyzer/src/caps.rs

-rw-r--r--

crates/rust-analyzer/src/diagnostics.rs

-rw-r--r--

crates/rust-analyzer/src/diagnostics/to_proto.rs

-rw-r--r--

crates/rust-analyzer/src/from_proto.rs

-rw-r--r--

crates/rust-analyzer/src/global_state.rs

-rw-r--r--

crates/rust-analyzer/src/to_proto.rs

-rw-r--r--

crates/stdx/src/hash.rs

-rw-r--r--

crates/stdx/src/lib.rs

-rw-r--r--

crates/test-utils/Cargo.toml

-rw-r--r--

crates/text-edit/Cargo.toml

-rw-r--r--

crates/vfs/Cargo.toml

-rw-r--r--

crates/vfs/src/file_set.rs

-rw-r--r--

crates/vfs/src/lib.rs

-rw-r--r--

lib/line-index/Cargo.toml

-rw-r--r--

lib/line-index/src/lib.rs

237

-rw-r--r--

lib/line-index/src/tests.rs

-rw-r--r--

lib/line-index/tests/it.rs

32 files changed, 481 insertions, 474 deletions

diff --git a/Cargo.lock b/Cargo.lock
index f0fe95327f..4b22a58e56 100644
--- a/Cargo.lock
+++ b/Cargo.lock

@@ -587,6 +587,7 @@ dependencies = [

"itertools",

"la-arena",

"limit",

+ "nohash-hasher",

"once_cell",

"profile",

"project-model",

@@ -650,6 +651,7 @@ dependencies = [

"ide-diagnostics",

"ide-ssr",

"itertools",

+ "nohash-hasher",

"oorandom",

"profile",

"pulldown-cmark",

@@ -717,7 +719,9 @@ dependencies = [

"indexmap",

"itertools",

"limit",

+ "line-index",

"memchr",

+ "nohash-hasher",

"once_cell",

"oorandom",

"parser",

@@ -763,6 +767,7 @@ dependencies = [

"hir",

"ide-db",

"itertools",

+ "nohash-hasher",

"parser",

"stdx",

"syntax",

@@ -913,6 +918,14 @@ name = "limit"

version = "0.0.0"

[[package]]

+name = "line-index"

+version = "0.1.0-pre.1"

+dependencies = [

+ "nohash-hasher",

+ "text-size",

+[[package]]

name = "lock_api"

version = "0.4.9"

source = "registry+https://github.com/rust-lang/crates.io-index"

@@ -1055,6 +1068,12 @@ dependencies = [

]

[[package]]

+name = "nohash-hasher"

+version = "0.2.0"

+source = "registry+https://github.com/rust-lang/crates.io-index"

+checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"

+[[package]]

name = "notify"

version = "5.1.0"

source = "registry+https://github.com/rust-lang/crates.io-index"

@@ -1470,6 +1489,7 @@ dependencies = [

"mbe",

"mimalloc",

"mio",

+ "nohash-hasher",

"num_cpus",

"oorandom",

"parking_lot 0.12.1",

@@ -2050,6 +2070,7 @@ version = "0.0.0"

dependencies = [

"fst",

"indexmap",

+ "nohash-hasher",

"paths",

"rustc-hash",

"stdx",

diff --git a/Cargo.toml b/Cargo.toml
index ef8d8c0eef..c7b0228e33 100644
--- a/Cargo.toml
+++ b/Cargo.toml

@@ -74,10 +74,17 @@ toolchain = { path = "./crates/toolchain", version = "0.0.0" }

tt = { path = "./crates/tt", version = "0.0.0" }

vfs-notify = { path = "./crates/vfs-notify", version = "0.0.0" }

vfs = { path = "./crates/vfs", version = "0.0.0" }

+line-index = { version = "0.1.0-pre.1", path = "./lib/line-index" }

# non-local crates

-smallvec = { version = "1.10.0", features = ["const_new", "union", "const_generics"] }

+smallvec = { version = "1.10.0", features = [

+ "const_new",

+ "union",

+ "const_generics",

+] }

smol_str = "0.2.0"

+nohash-hasher = "0.2.0"

+text-size = "1.1.0"

# the following crates are pinned to prevent us from pulling in syn 2 until all our dependencies have moved

serde = { version = "=1.0.156", features = ["derive"] }

serde_json = "1.0.94"

diff --git a/crates/hir-ty/Cargo.toml b/crates/hir-ty/Cargo.toml
index 51d69d103a..6ca0dbb850 100644
--- a/crates/hir-ty/Cargo.toml
+++ b/crates/hir-ty/Cargo.toml

@@ -29,6 +29,7 @@ chalk-derive = "0.89.0"

la-arena = { version = "0.3.0", path = "../../lib/la-arena" }

once_cell = "1.17.0"

triomphe.workspace = true

+nohash-hasher.workspace = true

typed-arena = "2.0.1"

rustc_index = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_index", default-features = false }

diff --git a/crates/hir-ty/src/test_db.rs b/crates/hir-ty/src/test_db.rs
index 1276a4c5e1..7d19e0a191 100644
--- a/crates/hir-ty/src/test_db.rs
+++ b/crates/hir-ty/src/test_db.rs

@@ -8,8 +8,8 @@ use base_db::{

};

use hir_def::{db::DefDatabase, ModuleId};

use hir_expand::db::ExpandDatabase;

+use nohash_hasher::IntMap;

use rustc_hash::FxHashSet;

-use stdx::hash::NoHashHashMap;

use syntax::TextRange;

use test_utils::extract_annotations;

use triomphe::Arc;

@@ -102,7 +102,7 @@ impl TestDB {

self.module_for_file_opt(file_id).unwrap()

}

- pub(crate) fn extract_annotations(&self) -> NoHashHashMap<FileId, Vec<(TextRange, String)>> {

+ pub(crate) fn extract_annotations(&self) -> IntMap<FileId, Vec<(TextRange, String)>> {

let mut files = Vec::new();

let crate_graph = self.crate_graph();

for krate in crate_graph.iter() {

diff --git a/crates/ide-db/Cargo.toml b/crates/ide-db/Cargo.toml
index fccd6d2b6d..4e75dc4dba 100644
--- a/crates/ide-db/Cargo.toml
+++ b/crates/ide-db/Cargo.toml

@@ -24,6 +24,7 @@ arrayvec = "0.7.2"

indexmap = "1.9.1"

memchr = "2.5.0"

triomphe.workspace = true

+nohash-hasher.workspace = true

# local deps

base-db.workspace = true

@@ -37,6 +38,8 @@ text-edit.workspace = true

# something from some `hir-xxx` subpackage, reexport the API via `hir`.

hir.workspace = true

+line-index.workspace = true

[dev-dependencies]

expect-test = "1.4.0"

oorandom = "11.1.3"

diff --git a/crates/ide-db/src/lib.rs b/crates/ide-db/src/lib.rs
index 5263271fa6..ff1a20f03f 100644
--- a/crates/ide-db/src/lib.rs
+++ b/crates/ide-db/src/lib.rs

@@ -13,7 +13,6 @@ pub mod famous_defs;

pub mod helpers;

pub mod items_locator;

pub mod label;

-pub mod line_index;

pub mod path_transform;

pub mod rename;

pub mod rust_doc;

@@ -55,6 +54,8 @@ use triomphe::Arc;

use crate::{line_index::LineIndex, symbol_index::SymbolsDatabase};

pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher};

+pub use ::line_index;

/// `base_db` is normally also needed in places where `ide_db` is used, so this re-export is for convenience.

pub use base_db;

@@ -414,4 +415,5 @@ impl SnippetCap {

#[cfg(test)]

mod tests {

mod sourcegen_lints;

+ mod line_index;

}

diff --git a/crates/ide-db/src/line_index.rs b/crates/ide-db/src/line_index.rs
deleted file mode 100644
index 9fb58ebe8a..0000000000
--- a/crates/ide-db/src/line_index.rs
+++ /dev/null

@@ -1,317 +0,0 @@

-//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`

-//! representation.

-use std::{iter, mem};

-use stdx::hash::NoHashHashMap;

-use syntax::{TextRange, TextSize};

-#[derive(Clone, Debug, PartialEq, Eq)]

-pub struct LineIndex {

- /// Offset the beginning of each line, zero-based.

- pub(crate) newlines: Vec<TextSize>,

- /// List of non-ASCII characters on each line.

- pub(crate) line_wide_chars: NoHashHashMap<u32, Vec<WideChar>>,

-/// Line/Column information in native, utf8 format.

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]

-pub struct LineCol {

- /// Zero-based

- pub line: u32,

- /// Zero-based utf8 offset

- pub col: u32,

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]

-pub enum WideEncoding {

- Utf16,

- Utf32,

-/// Line/Column information in legacy encodings.

-///

-/// Deliberately not a generic type and different from `LineCol`.

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]

-pub struct WideLineCol {

- /// Zero-based

- pub line: u32,

- /// Zero-based

- pub col: u32,

-#[derive(Clone, Debug, Hash, PartialEq, Eq)]

-pub(crate) struct WideChar {

- /// Start offset of a character inside a line, zero-based

- pub(crate) start: TextSize,

- /// End offset of a character inside a line, zero-based

- pub(crate) end: TextSize,

-impl WideChar {

- /// Returns the length in 8-bit UTF-8 code units.

- fn len(&self) -> TextSize {

- self.end - self.start

- }

- /// Returns the length in UTF-16 or UTF-32 code units.

- fn wide_len(&self, enc: WideEncoding) -> usize {

- match enc {

- WideEncoding::Utf16 => {

- if self.len() == TextSize::from(4) {

- 2

- } else {

- 1

- }

- WideEncoding::Utf32 => 1,

- }

-impl LineIndex {

- pub fn new(text: &str) -> LineIndex {

- let mut line_wide_chars = NoHashHashMap::default();

- let mut wide_chars = Vec::new();

- let mut newlines = Vec::with_capacity(16);

- newlines.push(TextSize::from(0));

- let mut curr_row = 0.into();

- let mut curr_col = 0.into();

- let mut line = 0;

- for c in text.chars() {

- let c_len = TextSize::of(c);

- curr_row += c_len;

- if c == '\n' {

- newlines.push(curr_row);

- // Save any utf-16 characters seen in the previous line

- if !wide_chars.is_empty() {

- line_wide_chars.insert(line, mem::take(&mut wide_chars));

- }

- // Prepare for processing the next line

- curr_col = 0.into();

- line += 1;

- continue;

- }

- if !c.is_ascii() {

- wide_chars.push(WideChar { start: curr_col, end: curr_col + c_len });

- }

- curr_col += c_len;

- }

- // Save any utf-16 characters seen in the last line

- if !wide_chars.is_empty() {

- line_wide_chars.insert(line, wide_chars);

- }

- newlines.shrink_to_fit();

- line_wide_chars.shrink_to_fit();

- LineIndex { newlines, line_wide_chars }

- }

- pub fn line_col(&self, offset: TextSize) -> LineCol {

- let line = self.newlines.partition_point(|&it| it <= offset) - 1;

- let line_start_offset = self.newlines[line];

- let col = offset - line_start_offset;

- LineCol { line: line as u32, col: col.into() }

- }

- pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {

- self.newlines

- .get(line_col.line as usize)

- .map(|offset| offset + TextSize::from(line_col.col))

- }

- pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> WideLineCol {

- let col = self.utf8_to_wide_col(enc, line_col.line, line_col.col.into());

- WideLineCol { line: line_col.line, col: col as u32 }

- }

- pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> LineCol {

- let col = self.wide_to_utf8_col(enc, line_col.line, line_col.col);

- LineCol { line: line_col.line, col: col.into() }

- }

- pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {

- let lo = self.newlines.partition_point(|&it| it < range.start());

- let hi = self.newlines.partition_point(|&it| it <= range.end());

- let all = iter::once(range.start())

- .chain(self.newlines[lo..hi].iter().copied())

- .chain(iter::once(range.end()));

- all.clone()

- .zip(all.skip(1))

- .map(|(lo, hi)| TextRange::new(lo, hi))

- .filter(|it| !it.is_empty())

- }

- fn utf8_to_wide_col(&self, enc: WideEncoding, line: u32, col: TextSize) -> usize {

- let mut res: usize = col.into();

- if let Some(wide_chars) = self.line_wide_chars.get(&line) {

- for c in wide_chars {

- if c.end <= col {

- res -= usize::from(c.len()) - c.wide_len(enc);

- } else {

- // From here on, all utf16 characters come *after* the character we are mapping,

- // so we don't need to take them into account

- break;

- }

- res

- }

- fn wide_to_utf8_col(&self, enc: WideEncoding, line: u32, mut col: u32) -> TextSize {

- if let Some(wide_chars) = self.line_wide_chars.get(&line) {

- for c in wide_chars {

- if col > u32::from(c.start) {

- col += u32::from(c.len()) - c.wide_len(enc) as u32;

- } else {

- // From here on, all utf16 characters come *after* the character we are mapping,

- // so we don't need to take them into account

- break;

- }

- col.into()

- }

-#[cfg(test)]

-mod tests {

- use test_utils::skip_slow_tests;

- use super::WideEncoding::{Utf16, Utf32};

- use super::*;

- #[test]

- fn test_line_index() {

- let text = "hello\nworld";

- let table = [

- (00, 0, 0),

- (01, 0, 1),

- (05, 0, 5),

- (06, 1, 0),

- (07, 1, 1),

- (08, 1, 2),

- (10, 1, 4),

- (11, 1, 5),

- (12, 1, 6),

- ];

- let index = LineIndex::new(text);

- for (offset, line, col) in table {

- assert_eq!(index.line_col(offset.into()), LineCol { line, col });

- }

- let text = "\nhello\nworld";

- let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];

- let index = LineIndex::new(text);

- for (offset, line, col) in table {

- assert_eq!(index.line_col(offset.into()), LineCol { line, col });

- }

- #[test]

- fn test_char_len() {

- assert_eq!('メ'.len_utf8(), 3);

- assert_eq!('メ'.len_utf16(), 1);

- }

- #[test]

- fn test_empty_index() {

- let col_index = LineIndex::new(

- "

-const C: char = 'x';

-",

- );

- assert_eq!(col_index.line_wide_chars.len(), 0);

- }

- #[test]

- fn test_every_chars() {

- if skip_slow_tests() {

- return;

- }

- let text: String = {

- let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!

- chars.extend("\n".repeat(chars.len() / 16).chars());

- let mut rng = oorandom::Rand32::new(stdx::rand::seed());

- stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);

- chars.into_iter().collect()

- };

- assert!(text.contains('💩')); // Sanity check.

- let line_index = LineIndex::new(&text);

- let mut lin_col = LineCol { line: 0, col: 0 };

- let mut col_utf16 = 0;

- let mut col_utf32 = 0;

- for (offset, c) in text.char_indices() {

- let got_offset = line_index.offset(lin_col).unwrap();

- assert_eq!(usize::from(got_offset), offset);

- let got_lin_col = line_index.line_col(got_offset);

- assert_eq!(got_lin_col, lin_col);

- for enc in [Utf16, Utf32] {

- let wide_lin_col = line_index.to_wide(enc, lin_col);

- let got_lin_col = line_index.to_utf8(enc, wide_lin_col);

- assert_eq!(got_lin_col, lin_col);

- let want_col = match enc {

- Utf16 => col_utf16,

- Utf32 => col_utf32,

- };

- assert_eq!(wide_lin_col.col, want_col)

- }

- if c == '\n' {

- lin_col.line += 1;

- lin_col.col = 0;

- col_utf16 = 0;

- col_utf32 = 0;

- } else {

- lin_col.col += c.len_utf8() as u32;

- col_utf16 += c.len_utf16() as u32;

- col_utf32 += 1;

- }

- #[test]

- fn test_splitlines() {

- fn r(lo: u32, hi: u32) -> TextRange {

- TextRange::new(lo.into(), hi.into())

- }

- let text = "a\nbb\nccc\n";

- let line_index = LineIndex::new(text);

- let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();

- let expected = vec![r(0, 2), r(2, 5), r(5, 9)];

- assert_eq!(actual, expected);

- let text = "";

- let line_index = LineIndex::new(text);

- let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();

- let expected = vec![];

- assert_eq!(actual, expected);

- let text = "\n";

- let line_index = LineIndex::new(text);

- let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();

- let expected = vec![r(0, 1)];

- assert_eq!(actual, expected)

- }

diff --git a/crates/ide-db/src/search.rs b/crates/ide-db/src/search.rs
index f58a96d595..9d00c71709 100644
--- a/crates/ide-db/src/search.rs
+++ b/crates/ide-db/src/search.rs

@@ -11,9 +11,9 @@ use hir::{

AsAssocItem, DefWithBody, HasAttrs, HasSource, InFile, ModuleSource, Semantics, Visibility,

};

use memchr::memmem::Finder;

+use nohash_hasher::IntMap;

use once_cell::unsync::Lazy;

use parser::SyntaxKind;

-use stdx::hash::NoHashHashMap;

use syntax::{ast, match_ast, AstNode, TextRange, TextSize};

use triomphe::Arc;

@@ -25,7 +25,7 @@ use crate::{

#[derive(Debug, Default, Clone)]

pub struct UsageSearchResult {

- pub references: NoHashHashMap<FileId, Vec<FileReference>>,

+ pub references: IntMap<FileId, Vec<FileReference>>,

}

impl UsageSearchResult {

@@ -50,7 +50,7 @@ impl UsageSearchResult {

impl IntoIterator for UsageSearchResult {

type Item = (FileId, Vec<FileReference>);

- type IntoIter = <NoHashHashMap<FileId, Vec<FileReference>> as IntoIterator>::IntoIter;

+ type IntoIter = <IntMap<FileId, Vec<FileReference>> as IntoIterator>::IntoIter;

fn into_iter(self) -> Self::IntoIter {

self.references.into_iter()

@@ -84,17 +84,17 @@ pub enum ReferenceCategory {

/// e.g. for things like local variables.

#[derive(Clone, Debug)]

pub struct SearchScope {

- entries: NoHashHashMap<FileId, Option<TextRange>>,

+ entries: IntMap<FileId, Option<TextRange>>,

}

impl SearchScope {

- fn new(entries: NoHashHashMap<FileId, Option<TextRange>>) -> SearchScope {

+ fn new(entries: IntMap<FileId, Option<TextRange>>) -> SearchScope {

SearchScope { entries }

}

/// Build a search scope spanning the entire crate graph of files.

fn crate_graph(db: &RootDatabase) -> SearchScope {

- let mut entries = NoHashHashMap::default();

+ let mut entries = IntMap::default();

let graph = db.crate_graph();

for krate in graph.iter() {

@@ -108,7 +108,7 @@ impl SearchScope {

/// Build a search scope spanning all the reverse dependencies of the given crate.

fn reverse_dependencies(db: &RootDatabase, of: hir::Crate) -> SearchScope {

- let mut entries = NoHashHashMap::default();

+ let mut entries = IntMap::default();

for rev_dep in of.transitive_reverse_dependencies(db) {

let root_file = rev_dep.root_file(db);

let source_root_id = db.file_source_root(root_file);

@@ -128,7 +128,7 @@ impl SearchScope {

/// Build a search scope spanning the given module and all its submodules.

fn module_and_children(db: &RootDatabase, module: hir::Module) -> SearchScope {

- let mut entries = NoHashHashMap::default();

+ let mut entries = IntMap::default();

let (file_id, range) = {

let InFile { file_id, value } = module.definition_source(db);

@@ -161,7 +161,7 @@ impl SearchScope {

/// Build an empty search scope.

pub fn empty() -> SearchScope {

- SearchScope::new(NoHashHashMap::default())

+ SearchScope::new(IntMap::default())

}

/// Build a empty search scope spanning the given file.

diff --git a/crates/ide-db/src/source_change.rs b/crates/ide-db/src/source_change.rs
index 5a3e352b2e..061fb0f05c 100644
--- a/crates/ide-db/src/source_change.rs
+++ b/crates/ide-db/src/source_change.rs

@@ -5,16 +5,16 @@

use std::{collections::hash_map::Entry, iter, mem};

+use crate::SnippetCap;

use base_db::{AnchoredPathBuf, FileId};

-use stdx::{hash::NoHashHashMap, never};

+use nohash_hasher::IntMap;

+use stdx::never;

use syntax::{algo, ast, ted, AstNode, SyntaxNode, SyntaxNodePtr, TextRange, TextSize};

use text_edit::{TextEdit, TextEditBuilder};

-use crate::SnippetCap;

#[derive(Default, Debug, Clone)]

pub struct SourceChange {

- pub source_file_edits: NoHashHashMap<FileId, TextEdit>,

+ pub source_file_edits: IntMap<FileId, TextEdit>,

pub file_system_edits: Vec<FileSystemEdit>,

pub is_snippet: bool,

}

@@ -23,7 +23,7 @@ impl SourceChange {

/// Creates a new SourceChange with the given label

/// from the edits.

pub fn from_edits(

- source_file_edits: NoHashHashMap<FileId, TextEdit>,

+ source_file_edits: IntMap<FileId, TextEdit>,

file_system_edits: Vec<FileSystemEdit>,

) -> Self {

SourceChange { source_file_edits, file_system_edits, is_snippet: false }

@@ -77,8 +77,8 @@ impl Extend<FileSystemEdit> for SourceChange {

}

-impl From<NoHashHashMap<FileId, TextEdit>> for SourceChange {

- fn from(source_file_edits: NoHashHashMap<FileId, TextEdit>) -> SourceChange {

+impl From<IntMap<FileId, TextEdit>> for SourceChange {

+ fn from(source_file_edits: IntMap<FileId, TextEdit>) -> SourceChange {

SourceChange { source_file_edits, file_system_edits: Vec::new(), is_snippet: false }

}

diff --git a/crates/ide-db/src/tests/line_index.rs b/crates/ide-db/src/tests/line_index.rs
new file mode 100644
index 0000000000..6b49bb2631
--- /dev/null
+++ b/crates/ide-db/src/tests/line_index.rs

@@ -0,0 +1,49 @@

+use line_index::{LineCol, LineIndex, WideEncoding};

+use test_utils::skip_slow_tests;

+#[test]

+fn test_every_chars() {

+ if skip_slow_tests() {

+ return;

+ }

+ let text: String = {

+ let mut chars: Vec<char> = ((0 as char)..char::MAX).collect(); // Neat!

+ chars.extend("\n".repeat(chars.len() / 16).chars());

+ let mut rng = oorandom::Rand32::new(stdx::rand::seed());

+ stdx::rand::shuffle(&mut chars, |i| rng.rand_range(0..i as u32) as usize);

+ chars.into_iter().collect()

+ };

+ assert!(text.contains('💩')); // Sanity check.

+ let line_index = LineIndex::new(&text);

+ let mut lin_col = LineCol { line: 0, col: 0 };

+ let mut col_utf16 = 0;

+ let mut col_utf32 = 0;

+ for (offset, c) in text.char_indices() {

+ let got_offset = line_index.offset(lin_col).unwrap();

+ assert_eq!(usize::from(got_offset), offset);

+ let got_lin_col = line_index.line_col(got_offset);

+ assert_eq!(got_lin_col, lin_col);

+ for (enc, col) in [(WideEncoding::Utf16, col_utf16), (WideEncoding::Utf32, col_utf32)] {

+ let wide_lin_col = line_index.to_wide(enc, lin_col).unwrap();

+ let got_lin_col = line_index.to_utf8(enc, wide_lin_col).unwrap();

+ assert_eq!(got_lin_col, lin_col);

+ assert_eq!(wide_lin_col.col, col)

+ }

+ if c == '\n' {

+ lin_col.line += 1;

+ lin_col.col = 0;

+ col_utf16 = 0;

+ col_utf32 = 0;

+ } else {

+ lin_col.col += c.len_utf8() as u32;

+ col_utf16 += c.len_utf16() as u32;

+ col_utf32 += 1;

+ }

diff --git a/crates/ide-ssr/Cargo.toml b/crates/ide-ssr/Cargo.toml
index b8625fc1be..70ed6dea5b 100644
--- a/crates/ide-ssr/Cargo.toml
+++ b/crates/ide-ssr/Cargo.toml

@@ -16,6 +16,7 @@ doctest = false

cov-mark = "2.0.0-pre.1"

itertools = "0.10.5"

triomphe.workspace = true

+nohash-hasher.workspace = true

# local deps

hir.workspace = true

diff --git a/crates/ide-ssr/src/lib.rs b/crates/ide-ssr/src/lib.rs
index a1945087d7..f51a9547a5 100644
--- a/crates/ide-ssr/src/lib.rs
+++ b/crates/ide-ssr/src/lib.rs

@@ -87,8 +87,8 @@ pub use crate::{errors::SsrError, from_comment::ssr_from_comment, matching::Matc

use crate::{errors::bail, matching::MatchFailureReason};

use hir::Semantics;

use ide_db::base_db::{FileId, FilePosition, FileRange};

+use nohash_hasher::IntMap;

use resolving::ResolvedRule;

-use stdx::hash::NoHashHashMap;

use syntax::{ast, AstNode, SyntaxNode, TextRange};

use text_edit::TextEdit;

@@ -168,9 +168,9 @@ impl<'db> MatchFinder<'db> {

}

/// Finds matches for all added rules and returns edits for all found matches.

- pub fn edits(&self) -> NoHashHashMap<FileId, TextEdit> {

+ pub fn edits(&self) -> IntMap<FileId, TextEdit> {

use ide_db::base_db::SourceDatabaseExt;

- let mut matches_by_file = NoHashHashMap::default();

+ let mut matches_by_file = IntMap::default();

for m in self.matches().matches {

matches_by_file

.entry(m.range.file_id)

diff --git a/crates/ide/Cargo.toml b/crates/ide/Cargo.toml
index c3dcb60490..2aee203c4e 100644
--- a/crates/ide/Cargo.toml
+++ b/crates/ide/Cargo.toml

@@ -24,6 +24,7 @@ url = "2.3.1"

dot = "0.1.4"

smallvec.workspace = true

triomphe.workspace = true

+nohash-hasher.workspace = true

# local deps

cfg.workspace = true

diff --git a/crates/ide/src/references.rs b/crates/ide/src/references.rs
index b8e05d4f62..291b1a349b 100644
--- a/crates/ide/src/references.rs
+++ b/crates/ide/src/references.rs

@@ -17,7 +17,7 @@ use ide_db::{

RootDatabase,

};

use itertools::Itertools;

-use stdx::hash::NoHashHashMap;

+use nohash_hasher::IntMap;

use syntax::{

algo::find_node_at_offset,

ast::{self, HasName},

@@ -31,7 +31,7 @@ use crate::{FilePosition, NavigationTarget, TryToNav};

#[derive(Debug, Clone)]

pub struct ReferenceSearchResult {

pub declaration: Option<Declaration>,

- pub references: NoHashHashMap<FileId, Vec<(TextRange, Option<ReferenceCategory>)>>,

+ pub references: IntMap<FileId, Vec<(TextRange, Option<ReferenceCategory>)>>,

}

#[derive(Debug, Clone)]

diff --git a/crates/rust-analyzer/Cargo.toml b/crates/rust-analyzer/Cargo.toml
index c7c2e34127..ae5b8e4c42 100644
--- a/crates/rust-analyzer/Cargo.toml
+++ b/crates/rust-analyzer/Cargo.toml

@@ -46,6 +46,7 @@ tracing-subscriber = { version = "0.3.16", default-features = false, features =

tracing-log = "0.1.3"

tracing-tree = "0.2.1"

triomphe.workspace = true

+nohash-hasher.workspace = true

always-assert = "0.1.2"

# These dependencies are unused, but we pin them to a version here to restrict them for our transitive dependencies

@@ -95,7 +96,4 @@ mbe.workspace = true

[features]

jemalloc = ["jemallocator", "profile/jemalloc"]

force-always-assert = ["always-assert/force"]

-in-rust-tree = [

- "ide/in-rust-tree",

- "syntax/in-rust-tree",

+in-rust-tree = ["ide/in-rust-tree", "syntax/in-rust-tree"]

diff --git a/crates/rust-analyzer/src/caps.rs b/crates/rust-analyzer/src/caps.rs
index 3628670ac9..ab06b96814 100644
--- a/crates/rust-analyzer/src/caps.rs
+++ b/crates/rust-analyzer/src/caps.rs

@@ -23,13 +23,14 @@ use crate::semantic_tokens;

pub fn server_capabilities(config: &Config) -> ServerCapabilities {

ServerCapabilities {

- position_encoding: Some(match negotiated_encoding(config.caps()) {

- PositionEncoding::Utf8 => PositionEncodingKind::UTF8,

+ position_encoding: match negotiated_encoding(config.caps()) {

+ PositionEncoding::Utf8 => Some(PositionEncodingKind::UTF8),

PositionEncoding::Wide(wide) => match wide {

- WideEncoding::Utf16 => PositionEncodingKind::UTF16,

- WideEncoding::Utf32 => PositionEncodingKind::UTF32,

+ WideEncoding::Utf16 => Some(PositionEncodingKind::UTF16),

+ WideEncoding::Utf32 => Some(PositionEncodingKind::UTF32),

+ _ => None,

- }),

+ },

text_document_sync: Some(TextDocumentSyncCapability::Options(TextDocumentSyncOptions {

open_close: Some(true),

change: Some(TextDocumentSyncKind::INCREMENTAL),

diff --git a/crates/rust-analyzer/src/diagnostics.rs b/crates/rust-analyzer/src/diagnostics.rs
index 2edb394a0b..33422fd058 100644
--- a/crates/rust-analyzer/src/diagnostics.rs
+++ b/crates/rust-analyzer/src/diagnostics.rs

@@ -5,12 +5,12 @@ use std::mem;

use ide::FileId;

use ide_db::FxHashMap;

-use stdx::hash::{NoHashHashMap, NoHashHashSet};

+use nohash_hasher::{IntMap, IntSet};

use triomphe::Arc;

use crate::lsp_ext;

-pub(crate) type CheckFixes = Arc<NoHashHashMap<usize, NoHashHashMap<FileId, Vec<Fix>>>>;

+pub(crate) type CheckFixes = Arc<IntMap<usize, IntMap<FileId, Vec<Fix>>>>;

#[derive(Debug, Default, Clone)]

pub struct DiagnosticsMapConfig {

@@ -21,12 +21,12 @@ pub struct DiagnosticsMapConfig {

#[derive(Debug, Default, Clone)]

pub(crate) struct DiagnosticCollection {

- // FIXME: should be NoHashHashMap<FileId, Vec<ra_id::Diagnostic>>

- pub(crate) native: NoHashHashMap<FileId, Vec<lsp_types::Diagnostic>>,

+ // FIXME: should be IntMap<FileId, Vec<ra_id::Diagnostic>>

+ pub(crate) native: IntMap<FileId, Vec<lsp_types::Diagnostic>>,

// FIXME: should be Vec<flycheck::Diagnostic>

- pub(crate) check: NoHashHashMap<usize, NoHashHashMap<FileId, Vec<lsp_types::Diagnostic>>>,

+ pub(crate) check: IntMap<usize, IntMap<FileId, Vec<lsp_types::Diagnostic>>>,

pub(crate) check_fixes: CheckFixes,

- changes: NoHashHashSet<FileId>,

+ changes: IntSet<FileId>,

}

#[derive(Debug, Clone)]

@@ -106,7 +106,7 @@ impl DiagnosticCollection {

native.chain(check)

}

- pub(crate) fn take_changes(&mut self) -> Option<NoHashHashSet<FileId>> {

+ pub(crate) fn take_changes(&mut self) -> Option<IntSet<FileId>> {

if self.changes.is_empty() {

return None;

}

diff --git a/crates/rust-analyzer/src/diagnostics/to_proto.rs b/crates/rust-analyzer/src/diagnostics/to_proto.rs
index 415fa4e02f..e1d1130ff1 100644
--- a/crates/rust-analyzer/src/diagnostics/to_proto.rs
+++ b/crates/rust-analyzer/src/diagnostics/to_proto.rs

@@ -3,7 +3,6 @@

use std::collections::HashMap;

use flycheck::{Applicability, DiagnosticLevel, DiagnosticSpan};

-use ide_db::line_index::WideEncoding;

use itertools::Itertools;

use stdx::format_to;

use vfs::{AbsPath, AbsPathBuf};

@@ -80,37 +79,33 @@ fn position(

position_encoding: &PositionEncoding,

span: &DiagnosticSpan,

line_offset: usize,

- column_offset: usize,

+ column_offset_utf32: usize,

) -> lsp_types::Position {

let line_index = line_offset - span.line_start;

- let mut true_column_offset = column_offset;

- if let Some(line) = span.text.get(line_index) {

- if line.text.chars().count() == line.text.len() {

- // all one byte utf-8 char

- return lsp_types::Position {

- line: (line_offset as u32).saturating_sub(1),

- character: (column_offset as u32).saturating_sub(1),

- };

- }

- let mut char_offset = 0;

- let len_func = match position_encoding {

- PositionEncoding::Utf8 => char::len_utf8,

- PositionEncoding::Wide(WideEncoding::Utf16) => char::len_utf16,

- PositionEncoding::Wide(WideEncoding::Utf32) => |_| 1,

- };

- for c in line.text.chars() {

- char_offset += 1;

- if char_offset > column_offset {

- break;

+ let column_offset_encoded = match span.text.get(line_index) {

+ // Fast path.

+ Some(line) if line.text.is_ascii() => column_offset_utf32,

+ Some(line) => {

+ let line_prefix_len = line

+ .text

+ .char_indices()

+ .take(column_offset_utf32)

+ .last()

+ .map(|(pos, c)| pos + c.len_utf8())

+ .unwrap_or(0);

+ let line_prefix = &line.text[..line_prefix_len];

+ match position_encoding {

+ PositionEncoding::Utf8 => line_prefix.len(),

+ PositionEncoding::Wide(enc) => enc.measure(line_prefix),

}

- true_column_offset += len_func(c) - 1;

}

- }

+ None => column_offset_utf32,

+ };

lsp_types::Position {

line: (line_offset as u32).saturating_sub(1),

- character: (true_column_offset as u32).saturating_sub(1),

+ character: (column_offset_encoded as u32).saturating_sub(1),

}

diff --git a/crates/rust-analyzer/src/from_proto.rs b/crates/rust-analyzer/src/from_proto.rs
index 44891fad1a..cd74a5500d 100644
--- a/crates/rust-analyzer/src/from_proto.rs
+++ b/crates/rust-analyzer/src/from_proto.rs

@@ -31,7 +31,10 @@ pub(crate) fn offset(line_index: &LineIndex, position: lsp_types::Position) -> R

PositionEncoding::Utf8 => LineCol { line: position.line, col: position.character },

PositionEncoding::Wide(enc) => {

let line_col = WideLineCol { line: position.line, col: position.character };

- line_index.index.to_utf8(enc, line_col)

+ line_index

+ .index

+ .to_utf8(enc, line_col)

+ .ok_or_else(|| format_err!("Invalid wide col offset"))?

}

};

let text_size =

diff --git a/crates/rust-analyzer/src/global_state.rs b/crates/rust-analyzer/src/global_state.rs
index 0f77eeae4a..9535d88454 100644
--- a/crates/rust-analyzer/src/global_state.rs
+++ b/crates/rust-analyzer/src/global_state.rs

@@ -10,11 +10,11 @@ use flycheck::FlycheckHandle;

use ide::{Analysis, AnalysisHost, Cancellable, Change, FileId};

use ide_db::base_db::{CrateId, FileLoader, ProcMacroPaths, SourceDatabase};

use lsp_types::{SemanticTokens, Url};

+use nohash_hasher::IntMap;

use parking_lot::{Mutex, RwLock};

use proc_macro_api::ProcMacroServer;

use project_model::{CargoWorkspace, ProjectWorkspace, Target, WorkspaceBuildScripts};

use rustc_hash::FxHashMap;

-use stdx::hash::NoHashHashMap;

use triomphe::Arc;

use vfs::AnchoredPathBuf;

@@ -70,7 +70,7 @@ pub(crate) struct GlobalState {

pub(crate) flycheck_sender: Sender<flycheck::Message>,

pub(crate) flycheck_receiver: Receiver<flycheck::Message>,

- pub(crate) vfs: Arc<RwLock<(vfs::Vfs, NoHashHashMap<FileId, LineEndings>)>>,

+ pub(crate) vfs: Arc<RwLock<(vfs::Vfs, IntMap<FileId, LineEndings>)>>,

pub(crate) vfs_config_version: u32,

pub(crate) vfs_progress_config_version: u32,

pub(crate) vfs_progress_n_total: usize,

@@ -117,7 +117,7 @@ pub(crate) struct GlobalStateSnapshot {

pub(crate) check_fixes: CheckFixes,

mem_docs: MemDocs,

pub(crate) semantic_tokens_cache: Arc<Mutex<FxHashMap<Url, SemanticTokens>>>,

- vfs: Arc<RwLock<(vfs::Vfs, NoHashHashMap<FileId, LineEndings>)>>,

+ vfs: Arc<RwLock<(vfs::Vfs, IntMap<FileId, LineEndings>)>>,

pub(crate) workspaces: Arc<Vec<ProjectWorkspace>>,

// used to signal semantic highlighting to fall back to syntax based highlighting until proc-macros have been loaded

pub(crate) proc_macros_loaded: bool,

@@ -170,7 +170,7 @@ impl GlobalState {

flycheck_sender,

flycheck_receiver,

- vfs: Arc::new(RwLock::new((vfs::Vfs::default(), NoHashHashMap::default()))),

+ vfs: Arc::new(RwLock::new((vfs::Vfs::default(), IntMap::default()))),

vfs_config_version: 0,

vfs_progress_config_version: 0,

vfs_progress_n_total: 0,

diff --git a/crates/rust-analyzer/src/to_proto.rs b/crates/rust-analyzer/src/to_proto.rs
index 60292d2ad1..b9d853e202 100644
--- a/crates/rust-analyzer/src/to_proto.rs
+++ b/crates/rust-analyzer/src/to_proto.rs

@@ -32,7 +32,7 @@ pub(crate) fn position(line_index: &LineIndex, offset: TextSize) -> lsp_types::P

match line_index.encoding {

PositionEncoding::Utf8 => lsp_types::Position::new(line_col.line, line_col.col),

PositionEncoding::Wide(enc) => {

- let line_col = line_index.index.to_wide(enc, line_col);

+ let line_col = line_index.index.to_wide(enc, line_col).unwrap();

lsp_types::Position::new(line_col.line, line_col.col)

}

diff --git a/crates/stdx/src/hash.rs b/crates/stdx/src/hash.rs
deleted file mode 100644
index 0c21d2674b..0000000000
--- a/crates/stdx/src/hash.rs
+++ /dev/null

@@ -1,80 +0,0 @@

-//! A none hashing [`Hasher`] implementation.

-use std::{

- hash::{BuildHasher, Hasher},

- marker::PhantomData,

-};

-pub type NoHashHashMap<K, V> = std::collections::HashMap<K, V, NoHashHasherBuilder<K>>;

-pub type NoHashHashSet<K> = std::collections::HashSet<K, NoHashHasherBuilder<K>>;

-#[derive(Copy, Clone, Debug, PartialEq, Eq)]

-pub struct NoHashHasherBuilder<T>(PhantomData<T>);

-impl<T> Default for NoHashHasherBuilder<T> {

- fn default() -> Self {

- Self(Default::default())

- }

-pub trait NoHashHashable {}

-impl NoHashHashable for usize {}

-impl NoHashHashable for u32 {}

-pub struct NoHashHasher(u64);

-impl<T: NoHashHashable> BuildHasher for NoHashHasherBuilder<T> {

- type Hasher = NoHashHasher;

- fn build_hasher(&self) -> Self::Hasher {

- NoHashHasher(0)

- }

-impl Hasher for NoHashHasher {

- fn finish(&self) -> u64 {

- self.0

- }

- fn write(&mut self, _: &[u8]) {

- unimplemented!("NoHashHasher should only be used for hashing primitive integers")

- }

- fn write_u8(&mut self, i: u8) {

- self.0 = i as u64;

- }

- fn write_u16(&mut self, i: u16) {

- self.0 = i as u64;

- }

- fn write_u32(&mut self, i: u32) {

- self.0 = i as u64;

- }

- fn write_u64(&mut self, i: u64) {

- self.0 = i;

- }

- fn write_usize(&mut self, i: usize) {

- self.0 = i as u64;

- }

- fn write_i8(&mut self, i: i8) {

- self.0 = i as u64;

- }

- fn write_i16(&mut self, i: i16) {

- self.0 = i as u64;

- }

- fn write_i32(&mut self, i: i32) {

- self.0 = i as u64;

- }

- fn write_i64(&mut self, i: i64) {

- self.0 = i as u64;

- }

- fn write_isize(&mut self, i: isize) {

- self.0 = i as u64;

- }

diff --git a/crates/stdx/src/lib.rs b/crates/stdx/src/lib.rs
index 5639aaf57c..8df86e8100 100644
--- a/crates/stdx/src/lib.rs
+++ b/crates/stdx/src/lib.rs

@@ -7,7 +7,6 @@ use std::process::Command;

use std::{cmp::Ordering, ops, time::Instant};

mod macros;

-pub mod hash;

pub mod process;

pub mod panic_context;

pub mod non_empty_vec;

diff --git a/crates/test-utils/Cargo.toml b/crates/test-utils/Cargo.toml
index 92b1ef23e6..2b5b6f4956 100644
--- a/crates/test-utils/Cargo.toml
+++ b/crates/test-utils/Cargo.toml

@@ -14,7 +14,7 @@ doctest = false

[dependencies]

# Avoid adding deps here, this crate is widely used in tests it should compile fast!

dissimilar = "1.0.4"

-text-size = "1.1.0"

+text-size.workspace = true

rustc-hash = "1.1.0"

stdx.workspace = true

diff --git a/crates/text-edit/Cargo.toml b/crates/text-edit/Cargo.toml
index 337cd23473..76d0ca5ccb 100644
--- a/crates/text-edit/Cargo.toml
+++ b/crates/text-edit/Cargo.toml

@@ -13,4 +13,4 @@ doctest = false

[dependencies]

itertools = "0.10.5"

-text-size = "1.1.0"

+text-size.workspace = true

diff --git a/crates/vfs/Cargo.toml b/crates/vfs/Cargo.toml
index 802a300060..3ae3dc83ca 100644
--- a/crates/vfs/Cargo.toml
+++ b/crates/vfs/Cargo.toml

@@ -15,6 +15,7 @@ doctest = false

rustc-hash = "1.1.0"

fst = "0.4.7"

indexmap = "1.9.1"

+nohash-hasher.workspace = true

paths.workspace = true

stdx.workspace = true

diff --git a/crates/vfs/src/file_set.rs b/crates/vfs/src/file_set.rs
index 700aebe0b3..0392ef3ceb 100644
--- a/crates/vfs/src/file_set.rs
+++ b/crates/vfs/src/file_set.rs

@@ -5,8 +5,8 @@

use std::fmt;

use fst::{IntoStreamer, Streamer};

+use nohash_hasher::IntMap;

use rustc_hash::FxHashMap;

-use stdx::hash::NoHashHashMap;

use crate::{AnchoredPath, FileId, Vfs, VfsPath};

@@ -14,7 +14,7 @@ use crate::{AnchoredPath, FileId, Vfs, VfsPath};

#[derive(Default, Clone, Eq, PartialEq)]

pub struct FileSet {

files: FxHashMap<VfsPath, FileId>,

- paths: NoHashHashMap<FileId, VfsPath>,

+ paths: IntMap<FileId, VfsPath>,

}

impl FileSet {

diff --git a/crates/vfs/src/lib.rs b/crates/vfs/src/lib.rs
index b510b9e394..ff8a2b9673 100644
--- a/crates/vfs/src/lib.rs
+++ b/crates/vfs/src/lib.rs

@@ -62,7 +62,8 @@ pub use paths::{AbsPath, AbsPathBuf};

#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]

pub struct FileId(pub u32);

-impl stdx::hash::NoHashHashable for FileId {}

+/// safe because `FileId` is a newtype of `u32`

+impl nohash_hasher::IsEnabled for FileId {}

/// Storage for all files read by rust-analyzer.

///

diff --git a/lib/line-index/Cargo.toml b/lib/line-index/Cargo.toml
new file mode 100644
index 0000000000..019ad3a53b
--- /dev/null
+++ b/lib/line-index/Cargo.toml

@@ -0,0 +1,11 @@

+[package]

+name = "line-index"

+version = "0.1.0-pre.1"

+description = "Maps flat `TextSize` offsets to/from `(line, column)` representation."

+license = "MIT OR Apache-2.0"

+repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/line-index"

+edition = "2021"

+[dependencies]

+text-size.workspace = true

+nohash-hasher.workspace = true

diff --git a/lib/line-index/src/lib.rs b/lib/line-index/src/lib.rs
new file mode 100644
index 0000000000..ad67d3f246
--- /dev/null
+++ b/lib/line-index/src/lib.rs

@@ -0,0 +1,237 @@

+//! See [`LineIndex`].

+#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)]

+#[cfg(test)]

+mod tests;

+use nohash_hasher::IntMap;

+pub use text_size::{TextRange, TextSize};

+/// `(line, column)` information in the native, UTF-8 encoding.

+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]

+pub struct LineCol {

+ /// Zero-based.

+ pub line: u32,

+ /// Zero-based UTF-8 offset.

+ pub col: u32,

+/// A kind of wide character encoding.

+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]

+#[non_exhaustive]

+pub enum WideEncoding {

+ /// UTF-16.

+ Utf16,

+ /// UTF-32.

+ Utf32,

+impl WideEncoding {

+ /// Returns the number of code units it takes to encode `text` in this encoding.

+ pub fn measure(&self, text: &str) -> usize {

+ match self {

+ WideEncoding::Utf16 => text.encode_utf16().count(),

+ WideEncoding::Utf32 => text.chars().count(),

+ }

+/// `(line, column)` information in wide encodings.

+///

+/// See [`WideEncoding`] for the kinds of wide encodings available.

+//

+// Deliberately not a generic type and different from `LineCol`.

+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]

+pub struct WideLineCol {

+ /// Zero-based.

+ pub line: u32,

+ /// Zero-based.

+ pub col: u32,

+#[derive(Debug, Clone, Copy, PartialEq, Eq)]

+struct WideChar {

+ /// Start offset of a character inside a line, zero-based.

+ start: TextSize,

+ /// End offset of a character inside a line, zero-based.

+ end: TextSize,

+impl WideChar {

+ /// Returns the length in 8-bit UTF-8 code units.

+ fn len(&self) -> TextSize {

+ self.end - self.start

+ }

+ /// Returns the length in UTF-16 or UTF-32 code units.

+ fn wide_len(&self, enc: WideEncoding) -> u32 {

+ match enc {

+ WideEncoding::Utf16 => {

+ if self.len() == TextSize::from(4) {

+ 2

+ } else {

+ 1

+ }

+ WideEncoding::Utf32 => 1,

+ }

+/// Maps flat [`TextSize`] offsets to/from `(line, column)` representation.

+#[derive(Debug, Clone, PartialEq, Eq)]

+pub struct LineIndex {

+ /// Offset the beginning of each line (except the first, which always has offset 0).

+ newlines: Box<[TextSize]>,

+ /// List of non-ASCII characters on each line.

+ line_wide_chars: IntMap<u32, Box<[WideChar]>>,

+ /// The length of the entire text.

+ len: TextSize,

+impl LineIndex {

+ /// Returns a `LineIndex` for the `text`.

+ pub fn new(text: &str) -> LineIndex {

+ let mut newlines = Vec::<TextSize>::with_capacity(16);

+ let mut line_wide_chars = IntMap::<u32, Box<[WideChar]>>::default();

+ let mut wide_chars = Vec::<WideChar>::new();

+ let mut cur_row = TextSize::from(0);

+ let mut cur_col = TextSize::from(0);

+ let mut line = 0u32;

+ for c in text.chars() {

+ let c_len = TextSize::of(c);

+ cur_row += c_len;

+ if c == '\n' {

+ newlines.push(cur_row);

+ // Save any wide characters seen in the previous line

+ if !wide_chars.is_empty() {

+ let cs = std::mem::take(&mut wide_chars).into_boxed_slice();

+ line_wide_chars.insert(line, cs);

+ }

+ // Prepare for processing the next line

+ cur_col = TextSize::from(0);

+ line += 1;

+ continue;

+ }

+ if !c.is_ascii() {

+ wide_chars.push(WideChar { start: cur_col, end: cur_col + c_len });

+ }

+ cur_col += c_len;

+ }

+ // Save any wide characters seen in the last line

+ if !wide_chars.is_empty() {

+ line_wide_chars.insert(line, wide_chars.into_boxed_slice());

+ }

+ LineIndex {

+ newlines: newlines.into_boxed_slice(),

+ line_wide_chars,

+ len: TextSize::of(text),

+ }

+ /// Transforms the `TextSize` into a `LineCol`.

+ ///

+ /// # Panics

+ ///

+ /// If the offset is invalid. See [`Self::try_line_col`].

+ pub fn line_col(&self, offset: TextSize) -> LineCol {

+ self.try_line_col(offset).expect("invalid offset")

+ }

+ /// Transforms the `TextSize` into a `LineCol`.

+ ///

+ /// Returns `None` if the `offset` was invalid, e.g. if it extends past the end of the text or

+ /// points to the middle of a multi-byte character.

+ pub fn try_line_col(&self, offset: TextSize) -> Option<LineCol> {

+ if offset > self.len {

+ return None;

+ }

+ let line = self.newlines.partition_point(|&it| it <= offset);

+ let start = self.start_offset(line)?;

+ let col = offset - start;

+ let ret = LineCol { line: line as u32, col: col.into() };

+ self.line_wide_chars

+ .get(&ret.line)

+ .into_iter()

+ .flat_map(|it| it.iter())

+ .all(|it| col <= it.start || it.end <= col)

+ .then_some(ret)

+ }

+ /// Transforms the `LineCol` into a `TextSize`.

+ pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {

+ self.start_offset(line_col.line as usize).map(|start| start + TextSize::from(line_col.col))

+ }

+ fn start_offset(&self, line: usize) -> Option<TextSize> {

+ match line.checked_sub(1) {

+ None => Some(TextSize::from(0)),

+ Some(it) => self.newlines.get(it).copied(),

+ }

+ /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`.

+ pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option<WideLineCol> {

+ let mut col = line_col.col;

+ if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) {

+ for c in wide_chars.iter() {

+ if u32::from(c.end) <= line_col.col {

+ col = col.checked_sub(u32::from(c.len()) - c.wide_len(enc))?;

+ } else {

+ // From here on, all utf16 characters come *after* the character we are mapping,

+ // so we don't need to take them into account

+ break;

+ }

+ Some(WideLineCol { line: line_col.line, col })

+ }

+ /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`.

+ pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> Option<LineCol> {

+ let mut col = line_col.col;

+ if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) {

+ for c in wide_chars.iter() {

+ if col > u32::from(c.start) {

+ col = col.checked_add(u32::from(c.len()) - c.wide_len(enc))?;

+ } else {

+ // From here on, all utf16 characters come *after* the character we are mapping,

+ // so we don't need to take them into account

+ break;

+ }

+ Some(LineCol { line: line_col.line, col })

+ }

+ /// Given a range [start, end), returns a sorted iterator of non-empty ranges [start, x1), [x1,

+ /// x2), ..., [xn, end) where all the xi, which are positions of newlines, are inside the range

+ /// [start, end).

+ pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {

+ let lo = self.newlines.partition_point(|&it| it < range.start());

+ let hi = self.newlines.partition_point(|&it| it <= range.end());

+ let all = std::iter::once(range.start())

+ .chain(self.newlines[lo..hi].iter().copied())

+ .chain(std::iter::once(range.end()));

+ all.clone()

+ .zip(all.skip(1))

+ .map(|(lo, hi)| TextRange::new(lo, hi))

+ .filter(|it| !it.is_empty())

+ }

+ /// Returns the length of the original text.

+ pub fn len(&self) -> TextSize {

+ self.len

+ }

diff --git a/lib/line-index/src/tests.rs b/lib/line-index/src/tests.rs
new file mode 100644
index 0000000000..31c01c20ee
--- /dev/null
+++ b/lib/line-index/src/tests.rs

@@ -0,0 +1,11 @@

+use super::LineIndex;

+#[test]

+fn test_empty_index() {

+ let col_index = LineIndex::new(

+ "

+const C: char = 'x';

+",

+ );

+ assert_eq!(col_index.line_wide_chars.len(), 0);

diff --git a/lib/line-index/tests/it.rs b/lib/line-index/tests/it.rs
new file mode 100644
index 0000000000..ce1c0bc6f1
--- /dev/null
+++ b/lib/line-index/tests/it.rs

@@ -0,0 +1,62 @@

+use line_index::{LineCol, LineIndex, TextRange};

+#[test]

+fn test_line_index() {

+ let text = "hello\nworld";

+ let table = [

+ (00, 0, 0),

+ (01, 0, 1),

+ (05, 0, 5),

+ (06, 1, 0),

+ (07, 1, 1),

+ (08, 1, 2),

+ (10, 1, 4),

+ (11, 1, 5),

+ ];

+ let index = LineIndex::new(text);

+ for (offset, line, col) in table {

+ assert_eq!(index.line_col(offset.into()), LineCol { line, col });

+ }

+ let text = "\nhello\nworld";

+ let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];

+ let index = LineIndex::new(text);

+ for (offset, line, col) in table {

+ assert_eq!(index.line_col(offset.into()), LineCol { line, col });

+ }

+#[test]

+fn test_char_len() {

+ assert_eq!('メ'.len_utf8(), 3);

+ assert_eq!('メ'.len_utf16(), 1);

+#[test]

+fn test_splitlines() {

+ fn r(lo: u32, hi: u32) -> TextRange {

+ TextRange::new(lo.into(), hi.into())

+ }

+ let text = "a\nbb\nccc\n";

+ let line_index = LineIndex::new(text);

+ let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();

+ let expected = vec![r(0, 2), r(2, 5), r(5, 9)];

+ assert_eq!(actual, expected);

+ let text = "";

+ let line_index = LineIndex::new(text);

+ let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();

+ let expected = vec![];

+ assert_eq!(actual, expected);

+ let text = "\n";

+ let line_index = LineIndex::new(text);

+ let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();

+ let expected = vec![r(0, 1)];

+ assert_eq!(actual, expected)