Unnamed repository; edit this file 'description' to name the repository.
feat(cairo): update tree-sitter grammar and queries (#10919)
* feat(cairo): update tree-sitter grammar and queries * fix suggestions
Lucas @ StarkWare 2024-06-12
parent 8a549b7 · commit 9123d3f
-rw-r--r--languages.toml5
-rw-r--r--runtime/queries/cairo/highlights.scm363
-rw-r--r--runtime/queries/cairo/indents.scm119
-rw-r--r--runtime/queries/cairo/injections.scm2
-rw-r--r--runtime/queries/cairo/locals.scm26
-rw-r--r--runtime/queries/cairo/textobjects.scm74
6 files changed, 583 insertions, 6 deletions
diff --git a/languages.toml b/languages.toml
index 1888e8f4..436937cd 100644
--- a/languages.toml
+++ b/languages.toml
@@ -2074,9 +2074,12 @@ file-types = ["cairo"]
comment-token = "//"
indent = { tab-width = 4, unit = " " }
# auto-format = true
-grammar = "rust"
language-servers = [ "cairo-language-server" ]
+[[grammar]]
+name = "cairo"
+source = { git = "https://github.com/starkware-libs/tree-sitter-cairo", rev = "0596baab741ffacdc65c761d5d5ffbbeae97f033" }
+
[[language]]
name = "cpon"
scope = "scope.cpon"
diff --git a/runtime/queries/cairo/highlights.scm b/runtime/queries/cairo/highlights.scm
index ae55c7fa..d2cabd1c 100644
--- a/runtime/queries/cairo/highlights.scm
+++ b/runtime/queries/cairo/highlights.scm
@@ -1 +1,362 @@
-; inherits: rust
+; -------
+; Tree-Sitter doesn't allow overrides in regards to captures,
+; though it is possible to affect the child node of a captured
+; node. Thus, the approach here is to flip the order so that
+; overrides are unnecessary.
+; -------
+
+; -------
+; Types
+; -------
+
+(type_parameters
+ (type_identifier) @type.parameter)
+(constrained_type_parameter
+ left: (type_identifier) @type.parameter)
+
+; ---
+; Primitives
+; ---
+
+(primitive_type) @type.builtin
+(boolean_literal) @constant.builtin.boolean
+(numeric_literal) @constant.numeric.integer
+[
+ (string_literal)
+ (shortstring_literal)
+] @string
+[
+ (line_comment)
+] @comment
+
+; ---
+; Extraneous
+; ---
+
+(enum_variant (identifier) @type.enum.variant)
+
+(field_initializer
+ (field_identifier) @variable.other.member)
+(shorthand_field_initializer
+ (identifier) @variable.other.member)
+(shorthand_field_identifier) @variable.other.member
+
+
+; ---
+; Punctuation
+; ---
+
+[
+ "::"
+ "."
+ ";"
+ ","
+] @punctuation.delimiter
+
+[
+ "("
+ ")"
+ "["
+ "]"
+ "{"
+ "}"
+] @punctuation.bracket
+(type_arguments
+ [
+ "<"
+ ">"
+ ] @punctuation.bracket)
+(type_parameters
+ [
+ "<"
+ ">"
+ ] @punctuation.bracket)
+
+; ---
+; Variables
+; ---
+
+(let_declaration
+ pattern: [
+ ((identifier) @variable)
+ ((tuple_pattern
+ (identifier) @variable))
+ ])
+
+; It needs to be anonymous to not conflict with `call_expression` further below.
+(_
+ value: (field_expression
+ value: (identifier)? @variable
+ field: (field_identifier) @variable.other.member))
+
+(parameter
+ pattern: (identifier) @variable.parameter)
+
+; -------
+; Keywords
+; -------
+[
+ "match"
+ "if"
+ "else"
+] @keyword.control.conditional
+
+[
+ "while"
+ "loop"
+] @keyword.control.repeat
+
+[
+ "break"
+ "continue"
+ "return"
+] @keyword.control.return
+
+"use" @keyword.control.import
+(mod_item "mod" @keyword.control.import !body)
+(use_as_clause "as" @keyword.control.import)
+
+
+[
+ (crate)
+ (super)
+ "as"
+ "pub"
+ "mod"
+ (extern)
+ (nopanic)
+
+ "impl"
+ "trait"
+ "of"
+
+ "default"
+] @keyword
+
+[
+ "struct"
+ "enum"
+ "type"
+] @keyword.storage.type
+
+"let" @keyword.storage
+"fn" @keyword.function
+
+(mutable_specifier) @keyword.storage.modifier.mut
+(ref_specifier) @keyword.storage.modifier.ref
+
+(snapshot_type "@" @keyword.storage.modifier.ref)
+
+[
+ "const"
+ "ref"
+] @keyword.storage.modifier
+
+; TODO: variable.mut to highlight mutable identifiers via locals.scm
+
+; -------
+; Constructors
+; -------
+; TODO: this is largely guesswork, remove it once we get actual info from locals.scm or r-a
+
+(struct_expression
+ name: (type_identifier) @constructor)
+
+(tuple_enum_pattern
+ type: [
+ (identifier) @constructor
+ (scoped_identifier
+ name: (identifier) @constructor)
+ ])
+(struct_pattern
+ type: [
+ ((type_identifier) @constructor)
+ (scoped_type_identifier
+ name: (type_identifier) @constructor)
+ ])
+(match_pattern
+ ((identifier) @constructor) (#match? @constructor "^[A-Z]"))
+(or_pattern
+ ((identifier) @constructor)
+ ((identifier) @constructor)
+ (#match? @constructor "^[A-Z]"))
+
+; -------
+; Guess Other Types
+; -------
+
+((identifier) @constant
+ (#match? @constant "^[A-Z][A-Z\\d_]*$"))
+
+; ---
+; PascalCase identifiers in call_expressions (e.g. `Ok()`)
+; are assumed to be enum constructors.
+; ---
+
+(call_expression
+ function: [
+ ((identifier) @constructor
+ (#match? @constructor "^[A-Z]"))
+ (scoped_identifier
+ name: ((identifier) @constructor
+ (#match? @constructor "^[A-Z]")))
+ ])
+
+; ---
+; PascalCase identifiers under a path which is also PascalCase
+; are assumed to be constructors if they have methods or fields.
+; ---
+
+(field_expression
+ value: (scoped_identifier
+ path: [
+ (identifier) @type
+ (scoped_identifier
+ name: (identifier) @type)
+ ]
+ name: (identifier) @constructor
+ (#match? @type "^[A-Z]")
+ (#match? @constructor "^[A-Z]")))
+
+; ---
+; Other PascalCase identifiers are assumed to be structs.
+; ---
+
+((identifier) @type
+ (#match? @type "^[A-Z]"))
+
+; -------
+; Functions
+; -------
+
+(call_expression
+ function: [
+ ((identifier) @function)
+ (scoped_identifier
+ name: (identifier) @function)
+ (field_expression
+ field: (field_identifier) @function)
+ ])
+(generic_function
+ function: [
+ ((identifier) @function)
+ (scoped_identifier
+ name: (identifier) @function)
+ (field_expression
+ field: (field_identifier) @function.method)
+ ])
+(function_item
+ (function
+ name: (identifier) @function))
+
+(function_signature_item
+ (function
+ name: (identifier) @function))
+
+(external_function_item
+ (function
+ name: (identifier) @function))
+
+; ---
+; Macros
+; ---
+
+(attribute
+ (identifier) @special
+ arguments: (token_tree (identifier) @type)
+ (#eq? @special "derive")
+)
+
+(attribute
+ (identifier) @function.macro)
+(attribute
+ [
+ (identifier) @function.macro
+ (scoped_identifier
+ name: (identifier) @function.macro)
+ ]
+ (token_tree (identifier) @function.macro)?)
+
+(inner_attribute_item) @attribute
+
+(macro_invocation
+ macro: [
+ ((identifier) @function.macro)
+ (scoped_identifier
+ name: (identifier) @function.macro)
+ ]
+ "!" @function.macro)
+
+
+; -------
+; Operators
+; -------
+
+[
+ "*"
+ "->"
+ "=>"
+ "<="
+ "="
+ "=="
+ "!"
+ "!="
+ "%"
+ "%="
+ "@"
+ "&&"
+ "|"
+ "||"
+ "^"
+ "*"
+ "*="
+ "-"
+ "-="
+ "+"
+ "+="
+ "/"
+ "/="
+ ">"
+ "<"
+ ">="
+ ">>"
+ "<<"
+] @operator
+
+; -------
+; Paths
+; -------
+
+(use_declaration
+ argument: (identifier) @namespace)
+(use_wildcard
+ (identifier) @namespace)
+(mod_item
+ name: (identifier) @namespace)
+(scoped_use_list
+ path: (identifier)? @namespace)
+(use_list
+ (identifier) @namespace)
+(use_as_clause
+ path: (identifier)? @namespace
+ alias: (identifier) @namespace)
+
+; ---
+; Remaining Paths
+; ---
+
+(scoped_identifier
+ path: (identifier)? @namespace
+ name: (identifier) @namespace)
+(scoped_type_identifier
+ path: (identifier) @namespace)
+
+; -------
+; Remaining Identifiers
+; -------
+
+"?" @special
+
+(type_identifier) @type
+(identifier) @variable
+(field_identifier) @variable.other.member
diff --git a/runtime/queries/cairo/indents.scm b/runtime/queries/cairo/indents.scm
index ae55c7fa..35c16242 100644
--- a/runtime/queries/cairo/indents.scm
+++ b/runtime/queries/cairo/indents.scm
@@ -1 +1,118 @@
-; inherits: rust
+[
+ (use_list)
+ (block)
+ (match_block)
+ (arguments)
+ (parameters)
+ (declaration_list)
+ (field_declaration_list)
+ (field_initializer_list)
+ (struct_pattern)
+ (tuple_pattern)
+ (unit_expression)
+ (enum_variant_list)
+ (call_expression)
+ (binary_expression)
+ (field_expression)
+ (tuple_expression)
+ (array_expression)
+
+ (token_tree)
+] @indent
+
+[
+ "}"
+ "]"
+ ")"
+] @outdent
+
+; Indent the right side of assignments.
+; The #not-same-line? predicate is required to prevent an extra indent for e.g.
+; an else-clause where the previous if-clause starts on the same line as the assignment.
+(assignment_expression
+ .
+ (_) @expr-start
+ right: (_) @indent
+ (#not-same-line? @indent @expr-start)
+ (#set! "scope" "all")
+)
+(compound_assignment_expr
+ .
+ (_) @expr-start
+ right: (_) @indent
+ (#not-same-line? @indent @expr-start)
+ (#set! "scope" "all")
+)
+(let_declaration
+ "let" @expr-start
+ value: (_) @indent
+ alternative: (_)? @indent
+ (#not-same-line? @indent @expr-start)
+ (#set! "scope" "all")
+)
+(let_condition
+ .
+ (_) @expr-start
+ value: (_) @indent
+ (#not-same-line? @indent @expr-start)
+ (#set! "scope" "all")
+)
+(if_expression
+ .
+ (_) @expr-start
+ condition: (_) @indent
+ (#not-same-line? @indent @expr-start)
+ (#set! "scope" "all")
+)
+(field_pattern
+ .
+ (_) @expr-start
+ pattern: (_) @indent
+ (#not-same-line? @indent @expr-start)
+ (#set! "scope" "all")
+)
+; Indent type aliases that span multiple lines, similar to
+; regular assignment expressions
+(type_item
+ .
+ (_) @expr-start
+ type: (_) @indent
+ (#not-same-line? @indent @expr-start)
+ (#set! "scope" "all")
+)
+
+; Some field expressions where the left part is a multiline expression are not
+; indented by cargo fmt.
+; Because this multiline expression might be nested in an arbitrary number of
+; field expressions, this can only be matched using a Regex.
+(field_expression
+ value: (_) @val
+ "." @outdent
+ ; Check whether the first line ends with `(`, `{` or `[` (up to whitespace).
+ (#match? @val "(\\A[^\\n\\r]+(\\(|\\{|\\[)[\\t ]*(\\n|\\r))")
+)
+; Same as above, but with an additional `call_expression`. This is required since otherwise
+; the arguments of the function call won't be outdented.
+(call_expression
+ function: (field_expression
+ value: (_) @val
+ "." @outdent
+ (#match? @val "(\\A[^\\n\\r]+(\\(|\\{|\\[)[\\t ]*(\\n|\\r))")
+ )
+ arguments: (_) @outdent
+)
+
+
+; Indent if guards in patterns.
+; Since the tree-sitter grammar doesn't create a node for the if expression,
+; it's not possible to do this correctly in all cases. Indenting the tail of the
+; whole pattern whenever it contains an `if` only fails if the `if` appears after
+; the second line of the pattern (which should only rarely be the case)
+(match_pattern
+ .
+ (_) @expr-start
+ "if" @pattern-guard
+ (#not-same-line? @expr-start @pattern-guard)
+) @indent
+
+
diff --git a/runtime/queries/cairo/injections.scm b/runtime/queries/cairo/injections.scm
index a2358b1c..e07c83b4 100644
--- a/runtime/queries/cairo/injections.scm
+++ b/runtime/queries/cairo/injections.scm
@@ -1,3 +1,3 @@
-([(line_comment) (block_comment)] @injection.content
+([(line_comment)] @injection.content
(#set! injection.language "comment"))
diff --git a/runtime/queries/cairo/locals.scm b/runtime/queries/cairo/locals.scm
index ae55c7fa..35acb55c 100644
--- a/runtime/queries/cairo/locals.scm
+++ b/runtime/queries/cairo/locals.scm
@@ -1 +1,25 @@
-; inherits: rust
+; Scopes
+
+[
+ (function_item)
+ (struct_item)
+ (enum_item)
+ (type_item)
+ (trait_item)
+ (impl_item)
+ (block)
+] @local.scope
+
+; Definitions
+
+(parameter
+ (identifier) @local.definition)
+
+(type_parameters
+ (type_identifier) @local.definition)
+(constrained_type_parameter
+ left: (type_identifier) @local.definition)
+
+; References
+(identifier) @local.reference
+(type_identifier) @local.reference
diff --git a/runtime/queries/cairo/textobjects.scm b/runtime/queries/cairo/textobjects.scm
index ae55c7fa..4031873d 100644
--- a/runtime/queries/cairo/textobjects.scm
+++ b/runtime/queries/cairo/textobjects.scm
@@ -1 +1,73 @@
-; inherits: rust
+(function_item
+ body: (_) @function.inside) @function.around
+
+(struct_item
+ body: (_) @class.inside) @class.around
+
+(enum_item
+ body: (_) @class.inside) @class.around
+
+(trait_item
+ body: (_) @class.inside) @class.around
+
+(impl_item
+ body: (_) @class.inside) @class.around
+
+(parameters
+ ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+(type_parameters
+ ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+(type_arguments
+ ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+(arguments
+ ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+(field_initializer_list
+ ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+[
+ (line_comment)
+] @comment.inside
+
+(line_comment)+ @comment.around
+
+(; #[test]
+ (attribute_item
+ (attribute
+ (identifier) @_test_attribute))
+ ; allow other attributes like #[should_panic] and comments
+ [
+ (attribute_item)
+ (line_comment)
+ ]*
+ ; the test function
+ (function_item
+ body: (_) @test.inside) @test.around
+ (#eq? @_test_attribute "test"))
+
+(array_expression
+ (_) @entry.around)
+
+(tuple_expression
+ (_) @entry.around)
+
+(tuple_pattern
+ (_) @entry.around)
+
+; Commonly used vec macro intializer is special cased
+(macro_invocation
+ (identifier) @_id (token_tree (_) @entry.around)
+ (#eq? @_id "array"))
+
+(enum_variant) @entry.around
+
+(field_declaration
+ (_) @entry.inside) @entry.around
+
+(field_initializer
+ (_) @entry.inside) @entry.around
+
+(shorthand_field_initializer) @entry.around