helix

Unnamed repository; edit this file 'description' to name the repository.

master 24Branches 30Tags

Clone

HTTPS

SSH

Open with VS Code

Diffstat (limited to 'helix-view/src/handlers/word_index.rs')

-rw-r--r--

helix-view/src/handlers/word_index.rs

509

1 files changed, 0 insertions, 509 deletions

diff --git a/helix-view/src/handlers/word_index.rs b/helix-view/src/handlers/word_index.rs
deleted file mode 100644
index 9c3f8338..00000000
--- a/helix-view/src/handlers/word_index.rs
+++ /dev/null

@@ -1,509 +0,0 @@

-//! Indexing of words from open buffers.

-//!

-//! This provides an eventually consistent set of words used in any open buffers. This set is

-//! later used for lexical completion.

-use std::{borrow::Cow, collections::HashMap, iter, mem, sync::Arc, time::Duration};

-use helix_core::{

- chars::char_is_word, fuzzy::fuzzy_match, movement, ChangeSet, Range, Rope, RopeSlice,

-};

-use helix_event::{register_hook, AsyncHook};

-use helix_stdx::rope::RopeSliceExt as _;

-use parking_lot::RwLock;

-use tokio::{sync::mpsc, time::Instant};

-use crate::{

- events::{ConfigDidChange, DocumentDidChange, DocumentDidClose, DocumentDidOpen},

- DocumentId,

-};

-use super::Handlers;

-#[derive(Debug)]

-struct Change {

- old_text: Rope,

- text: Rope,

- changes: ChangeSet,

-#[derive(Debug)]

-enum Event {

- Insert(Rope),

- Update(DocumentId, Change),

- Delete(DocumentId, Rope),

- /// Clear the entire word index.

- /// This is used to clear memory when the feature is turned off.

- Clear,

-#[derive(Debug)]

-pub struct Handler {

- pub(super) index: WordIndex,

- /// A sender into an async hook which debounces updates to the index.

- hook: mpsc::Sender<Event>,

- /// A sender to a tokio task which coordinates the indexing of documents.

- ///

- /// See [WordIndex::run]. A supervisor-like task is in charge of spawning tasks to update the

- /// index. This ensures that consecutive edits to a document trigger the correct order of

- /// insertions and deletions into the word set.

- coordinator: mpsc::UnboundedSender<Event>,

-impl Handler {

- pub fn spawn() -> Self {

- let index = WordIndex::default();

- let (tx, rx) = mpsc::unbounded_channel();

- tokio::spawn(index.clone().run(rx));

- Self {

- hook: Hook {

- changes: HashMap::default(),

- coordinator: tx.clone(),

- }

- .spawn(),

- index,

- coordinator: tx,

- }

-#[derive(Debug)]

-struct Hook {

- changes: HashMap<DocumentId, Change>,

- coordinator: mpsc::UnboundedSender<Event>,

-const DEBOUNCE: Duration = Duration::from_secs(1);

-impl AsyncHook for Hook {

- type Event = Event;

- fn handle_event(&mut self, event: Self::Event, timeout: Option<Instant>) -> Option<Instant> {

- match event {

- Event::Insert(_) => unreachable!("inserts are sent to the worker directly"),

- Event::Update(doc, change) => {

- if let Some(pending_change) = self.changes.get_mut(&doc) {

- // If there is already a change waiting for this document, merge the two

- // changes together by composing the changesets and saving the new `text`.

- pending_change.changes =

- mem::take(&mut pending_change.changes).compose(change.changes);

- pending_change.text = change.text;

- Some(Instant::now() + DEBOUNCE)

- } else if !is_changeset_significant(&change.changes) {

- // If the changeset is fairly large, debounce before updating the index.

- self.changes.insert(doc, change);

- Some(Instant::now() + DEBOUNCE)

- } else {

- // Otherwise if the change is small, queue the update to the index immediately.

- self.coordinator.send(Event::Update(doc, change)).unwrap();

- timeout

- }

- Event::Delete(doc, text) => {

- // If there are pending changes that haven't been indexed since the last debounce,

- // forget them and delete the old text.

- if let Some(change) = self.changes.remove(&doc) {

- self.coordinator

- .send(Event::Delete(doc, change.old_text))

- .unwrap();

- } else {

- self.coordinator.send(Event::Delete(doc, text)).unwrap();

- }

- timeout

- }

- Event::Clear => unreachable!("clear is sent to the worker directly"),

- }

- fn finish_debounce(&mut self) {

- for (doc, change) in self.changes.drain() {

- self.coordinator.send(Event::Update(doc, change)).unwrap();

- }

-/// Minimum number of grapheme clusters required to include a word in the index

-const MIN_WORD_GRAPHEMES: usize = 3;

-/// Maximum word length allowed (in chars)

-const MAX_WORD_LEN: usize = 50;

-type Word = kstring::KString;

-#[derive(Debug, Default)]

-struct WordIndexInner {

- /// Reference counted storage for words.

- ///

- /// Words are very likely to be reused many times. Instead of storing duplicates we keep a

- /// reference count of times a word is used. When the reference count drops to zero the word

- /// is removed from the index.

- words: HashMap<Word, u32>,

-impl WordIndexInner {

- fn words(&self) -> impl Iterator<Item = &Word> {

- self.words.keys()

- }

- fn insert(&mut self, word: RopeSlice) {

- let word: Cow<str> = word.into();

- if let Some(rc) = self.words.get_mut(word.as_ref()) {

- *rc = rc.saturating_add(1);

- } else {

- let word = match word {

- Cow::Owned(s) => Word::from_string(s),

- Cow::Borrowed(s) => Word::from_ref(s),

- };

- self.words.insert(word, 1);

- }

- fn remove(&mut self, word: RopeSlice) {

- let word: Cow<str> = word.into();

- match self.words.get_mut(word.as_ref()) {

- Some(1) => {

- self.words.remove(word.as_ref());

- }

- Some(n) => *n -= 1,

- None => (),

- }

- fn clear(&mut self) {

- std::mem::take(&mut self.words);

- }

-#[derive(Debug, Default, Clone)]

-pub struct WordIndex {

- inner: Arc<RwLock<WordIndexInner>>,

-impl WordIndex {

- pub fn matches(&self, pattern: &str) -> Vec<String> {

- let inner = self.inner.read();

- let mut matches = fuzzy_match(pattern, inner.words(), false);

- matches.sort_unstable_by_key(|(_, score)| *score);

- matches

- .into_iter()

- .map(|(word, _)| word.to_string())

- .collect()

- }

- fn add_document(&self, text: &Rope) {

- let mut inner = self.inner.write();

- for word in words(text.slice(..)) {

- inner.insert(word);

- }

- fn update_document(&self, old_text: &Rope, text: &Rope, changes: &ChangeSet) {

- let mut inner = self.inner.write();

- for (old_window, new_window) in changed_windows(old_text.slice(..), text.slice(..), changes)

- {

- for word in words(new_window) {

- inner.insert(word);

- }

- for word in words(old_window) {

- inner.remove(word);

- }

- fn remove_document(&self, text: &Rope) {

- let mut inner = self.inner.write();

- for word in words(text.slice(..)) {

- inner.remove(word);

- }

- fn clear(&self) {

- let mut inner = self.inner.write();

- inner.clear();

- }

- /// Coordinate the indexing of documents.

- ///

- /// This task wraps a MPSC queue and spawns blocking tasks which update the index. Updates

- /// are applied one-by-one to ensure that changes to the index are **serialized**:

- /// updates to each document must be applied in-order.

- async fn run(self, mut events: mpsc::UnboundedReceiver<Event>) {

- while let Some(event) = events.recv().await {

- let this = self.clone();

- tokio::task::spawn_blocking(move || match event {

- Event::Insert(text) => {

- this.add_document(&text);

- }

- Event::Update(

- _doc,

- Change {

- old_text,

- text,

- changes,

- ..

- },

- ) => {

- this.update_document(&old_text, &text, &changes);

- }

- Event::Delete(_doc, text) => {

- this.remove_document(&text);

- }

- Event::Clear => {

- this.clear();

- }

- })

- .await

- .unwrap();

- }

-fn words(text: RopeSlice) -> impl Iterator<Item = RopeSlice> {

- let mut cursor = Range::point(0);

- if text

- .get_char(cursor.anchor)

- .is_some_and(|ch| !ch.is_whitespace())

- {

- let cursor_word_end = movement::move_next_word_end(text, cursor, 1);

- if cursor_word_end.anchor == 0 {

- cursor = cursor_word_end;

- }

- iter::from_fn(move || {

- while cursor.head <= text.len_chars() {

- let mut word = None;

- if text

- .slice(..cursor.head)

- .graphemes_rev()

- .take(MIN_WORD_GRAPHEMES)

- .take_while(|g| g.chars().all(char_is_word))

- .count()

- == MIN_WORD_GRAPHEMES

- {

- cursor.anchor += text

- .chars_at(cursor.anchor)

- .take_while(|&c| !char_is_word(c))

- .count();

- let slice = cursor.slice(text);

- if slice.len_chars() <= MAX_WORD_LEN {

- word = Some(slice);

- }

- let head = cursor.head;

- cursor = movement::move_next_word_end(text, cursor, 1);

- if cursor.head == head {

- cursor.head = usize::MAX;

- }

- if word.is_some() {

- return word;

- }

- None

- })

-/// Finds areas of the old and new texts around each operation in `changes`.

-///

-/// The window is larger than the changed area and can encompass multiple insert/delete operations

-/// if they are grouped closely together.

-///

-/// The ranges of the old and new text should usually be of different sizes. For example a

-/// deletion of "foo" surrounded by large retain sections would give a longer window into the

-/// `old_text` and shorter window of `new_text`. Vice-versa for an insertion. A full replacement

-/// of a word though would give two slices of the same size.

-fn changed_windows<'a>(

- old_text: RopeSlice<'a>,

- new_text: RopeSlice<'a>,

- changes: &'a ChangeSet,

-) -> impl Iterator<Item = (RopeSlice<'a>, RopeSlice<'a>)> {

- use helix_core::Operation::*;

- let mut operations = changes.changes().iter().peekable();

- let mut old_pos = 0;

- let mut new_pos = 0;

- iter::from_fn(move || loop {

- let operation = operations.next()?;

- let old_start = old_pos;

- let new_start = new_pos;

- let len = operation.len_chars();

- match operation {

- Retain(_) => {

- old_pos += len;

- new_pos += len;

- continue;

- }

- Insert(_) => new_pos += len,

- Delete(_) => old_pos += len,

- }

- // Scan ahead until a `Retain` is found which would end a window.

- while let Some(o) = operations.next_if(|op| !matches!(op, Retain(n) if *n > MAX_WORD_LEN)) {

- let len = o.len_chars();

- match o {

- Retain(_) => {

- old_pos += len;

- new_pos += len;

- }

- Delete(_) => old_pos += len,

- Insert(_) => new_pos += len,

- }

- let old_window = old_start.saturating_sub(MAX_WORD_LEN)

- ..(old_pos + MAX_WORD_LEN).min(old_text.len_chars());

- let new_window = new_start.saturating_sub(MAX_WORD_LEN)

- ..(new_pos + MAX_WORD_LEN).min(new_text.len_chars());

- return Some((old_text.slice(old_window), new_text.slice(new_window)));

- })

-/// Estimates whether a changeset is significant or small.

-fn is_changeset_significant(changes: &ChangeSet) -> bool {

- use helix_core::Operation::*;

- let mut diff = 0;

- for operation in changes.changes() {

- match operation {

- Retain(_) => continue,

- Delete(_) | Insert(_) => diff += operation.len_chars(),

- }

- // This is arbitrary and could be tuned further:

- diff > 1_000

-pub(crate) fn register_hooks(handlers: &Handlers) {

- let coordinator = handlers.word_index.coordinator.clone();

- register_hook!(move |event: &mut DocumentDidOpen<'_>| {

- let doc = doc!(event.editor, &event.doc);

- if doc.word_completion_enabled() {

- coordinator.send(Event::Insert(doc.text().clone())).unwrap();

- }

- Ok(())

- });

- let tx = handlers.word_index.hook.clone();

- register_hook!(move |event: &mut DocumentDidChange<'_>| {

- if !event.ghost_transaction && event.doc.word_completion_enabled() {

- helix_event::send_blocking(

- &tx,

- Event::Update(

- event.doc.id(),

- Change {

- old_text: event.old_text.clone(),

- text: event.doc.text().clone(),

- changes: event.changes.clone(),

- },

- ),

- );

- }

- Ok(())

- });

- let tx = handlers.word_index.hook.clone();

- register_hook!(move |event: &mut DocumentDidClose<'_>| {

- if event.doc.word_completion_enabled() {

- helix_event::send_blocking(

- &tx,

- Event::Delete(event.doc.id(), event.doc.text().clone()),

- );

- }

- Ok(())

- });

- let coordinator = handlers.word_index.coordinator.clone();

- register_hook!(move |event: &mut ConfigDidChange<'_>| {

- // The feature has been turned off. Clear the index and reclaim any used memory.

- if event.old.word_completion.enable && !event.new.word_completion.enable {

- coordinator.send(Event::Clear).unwrap();

- }

- // The feature has been turned on. Index open documents.

- if !event.old.word_completion.enable && event.new.word_completion.enable {

- for doc in event.editor.documents() {

- if doc.word_completion_enabled() {

- coordinator.send(Event::Insert(doc.text().clone())).unwrap();

- }

- Ok(())

- });

-#[cfg(test)]

-mod tests {

- use std::collections::HashSet;

- use super::*;

- use helix_core::diff::compare_ropes;

- impl WordIndex {

- fn words(&self) -> HashSet<String> {

- let inner = self.inner.read();

- inner.words().map(|w| w.to_string()).collect()

- }

- #[track_caller]

- fn assert_words<I: ToString, T: IntoIterator<Item = I>>(text: &str, expected: T) {

- let text = Rope::from_str(text);

- let index = WordIndex::default();

- index.add_document(&text);

- let actual = index.words();

- let expected: HashSet<_> = expected.into_iter().map(|i| i.to_string()).collect();

- assert_eq!(expected, actual);

- }

- #[test]

- fn parse() {

- assert_words("one two three", ["one", "two", "three"]);

- assert_words("a foo c", ["foo"]);

- }

- #[track_caller]

- fn assert_diff<S, R, I>(before: &str, after: &str, expect_removed: R, expect_inserted: I)

- where

- S: ToString,

- R: IntoIterator<Item = S>,

- I: IntoIterator<Item = S>,

- {

- let before = Rope::from_str(before);

- let after = Rope::from_str(after);

- let diff = compare_ropes(&before, &after);

- let expect_removed: HashSet<_> =

- expect_removed.into_iter().map(|i| i.to_string()).collect();

- let expect_inserted: HashSet<_> =

- expect_inserted.into_iter().map(|i| i.to_string()).collect();

- let index = WordIndex::default();

- index.add_document(&before);

- let words_before = index.words();

- index.update_document(&before, &after, diff.changes());

- let words_after = index.words();

- let actual_removed = words_before.difference(&words_after).cloned().collect();

- let actual_inserted = words_after.difference(&words_before).cloned().collect();

- eprintln!("\"{before}\" {words_before:?} => \"{after}\" {words_after:?}");

- assert_eq!(

- expect_removed, actual_removed,

- "expected {expect_removed:?} to be removed, instead {actual_removed:?} was"

- );

- assert_eq!(

- expect_inserted, actual_inserted,

- "expected {expect_inserted:?} to be inserted, instead {actual_inserted:?} was"

- );

- }

- #[test]

- fn diff() {

- assert_diff("one two three", "one five three", ["two"], ["five"]);

- assert_diff("one two three", "one to three", ["two"], []);

- assert_diff("one two three", "one three", ["two"], []);

- assert_diff("one two three", "one t{o three", ["two"], []);

- assert_diff("one foo three", "one fooo three", ["foo"], ["fooo"]);

- }