-rw-r--r--.gitignore3
-rw-r--r--Cargo.toml10
-rw-r--r--LICENSE373
-rw-r--r--src/lib.rs715
-rw-r--r--src/lib_old.rs424
-rw-r--r--src/recognizer.rs0
-rw-r--r--src/utf8.rs108
7 files changed, 1633 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6936990
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/target
+**/*.rs.bk
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..a839dd5
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "ctlfun"
+version = "0.1.0"
+authors = ["Luna Catkins <[email protected]>"]
+edition = "2018"
+license = "MPL-2.0"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a612ad9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,373 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+ means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+ means
+
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. "Executable Form"
+ means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
+
+1.8. "License"
+ means this document.
+
+1.9. "Licensable"
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
+
+1.10. "Modifications"
+ means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. "Patent Claims" of a Contributor
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
+
+1.12. "Secondary License"
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
+
+1.13. "Source Code Form"
+ means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+ or
+
+(b) for infringements caused by: (i) Your and any other third party's
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+ its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+* *
+* 6. Disclaimer of Warranty *
+* ------------------------- *
+* *
+* Covered Software is provided under this License on an "as is" *
+* basis, without warranty of any kind, either expressed, implied, or *
+* statutory, including, without limitation, warranties that the *
+* Covered Software is free of defects, merchantable, fit for a *
+* particular purpose or non-infringing. The entire risk as to the *
+* quality and performance of the Covered Software is with You. *
+* Should any Covered Software prove defective in any respect, You *
+* (not any Contributor) assume the cost of any necessary servicing, *
+* repair, or correction. This disclaimer of warranty constitutes an *
+* essential part of this License. No use of any Covered Software is *
+* authorized under this License except under this disclaimer. *
+* *
+************************************************************************
+
+************************************************************************
+* *
+* 7. Limitation of Liability *
+* -------------------------- *
+* *
+* Under no circumstances and under no legal theory, whether tort *
+* (including negligence), contract, or otherwise, shall any *
+* Contributor, or anyone who distributes Covered Software as *
+* permitted above, be liable to You for any direct, indirect, *
+* special, incidental, or consequential damages of any character *
+* including, without limitation, damages for lost profits, loss of *
+* goodwill, work stoppage, computer failure or malfunction, or any *
+* and all other commercial damages or losses, even if such party *
+* shall have been informed of the possibility of such damages. This *
+* limitation of liability shall not apply to liability for death or *
+* personal injury resulting from such party's negligence to the *
+* extent applicable law prohibits such limitation. Some *
+* jurisdictions do not allow the exclusion or limitation of *
+* incidental or consequential damages, so this exclusion and *
+* limitation may not apply to You. *
+* *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+ This Source Code Form is subject to the terms of the Mozilla Public
+ License, v. 2.0. If a copy of the MPL was not distributed with this
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as
+ defined by the Mozilla Public License, v. 2.0.
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..4b9585d
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,715 @@
+pub mod recognizer;
+
+mod utf8;
+
+const CLASS_TABLE: [Class; 0x80] = [
+ Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,
+ Class::C0S,Class::C0S,Class::C0S,Class::C0S,Class::C0S,Class::C0S,Class::C0 ,Class::C0 ,
+
+ Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,
+ Class::C0 ,Class::C0 ,Class::C0 ,Class::ESC,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,
+
+ Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,
+ Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,
+
+ Class::PAR,Class::PAR,Class::PAR,Class::PAR,Class::PAR,Class::PAR,Class::PAR,Class::PAR,
+ Class::PAR,Class::PAR,Class::SEP,Class::SEP,Class::PRI,Class::PRI,Class::PRI,Class::PRI,
+
+ Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,
+ Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,
+
+ Class::CSO,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,
+ Class::SOS,Class::C1 ,Class::SCI,Class::CSI,Class::ST ,Class::CSO,Class::CSO,Class::CSO,
+
+ Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,
+ Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,
+
+ Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,
+ Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::DEL,
+];
+
+const STATE_TABLE: [State; 0xE0] = [
+ State::C0Control, // Ground + C0
+ State::C0Control, // Ground + C0S
+ State::StartEscape, // Ground + ESC
+ State::Char, // Ground + INT
+ State::Char, // Ground + PAR
+ State::Char, // Ground + SEP
+ State::Char, // Ground + PRI
+ State::Char, // Ground + C1
+ State::Char, // Ground + CSO
+ State::Char, // Ground + SOS
+ State::Char, // Ground + SCI
+ State::Char, // Ground + CSI
+ State::Char, // Ground + ST
+ State::Char, // Ground + ICF
+ State::Ground, // Ground + DEL
+ State::Ground, // Ground + PAD
+
+ State::C0Control, // Escape + C0
+ State::C0Control, // Escape + C0S
+ State::StartEscape, // Escape + ESC
+ State::PushIntermediateByte, // Escape + INT
+ State::FinishSequence, // Escape + PAR
+ State::FinishSequence, // Escape + SEP
+ State::FinishSequence, // Escape + PRI
+ State::C1Control, // Escape + C1
+ State::StartCommandString, // Escape + CSO
+ State::StartCharacterString, // Escape + SOS
+ State::StartSingleCharacter, // Escape + SCI
+ State::StartControlSequence, // Escape + CSI
+ State::C1Control, // Escape + ST
+ State::FinishSequence, // Escape + ICF
+ State::Ground, // Escape + DEL
+ State::Ground, // Escape + PAD
+
+ State::ControlFunctionError, // ControlFunction + C0
+ State::ControlFunctionError, // ControlFunction + C0S
+ State::ControlFunctionError, // ControlFunction + ESC
+ State::PushIntermediateByte, // ControlFunction + INT
+ State::FinishSequence, // ControlFunction + PAR
+ State::FinishSequence, // ControlFunction + SEP
+ State::FinishSequence, // ControlFunction + PRI
+ State::FinishSequence, // ControlFunction + C1
+ State::FinishSequence, // ControlFunction + CSO
+ State::FinishSequence, // ControlFunction + SOS
+ State::FinishSequence, // ControlFunction + SCI
+ State::FinishSequence, // ControlFunction + CSI
+ State::FinishSequence, // ControlFunction + ST
+ State::FinishSequence, // ControlFunction + ICF
+ State::Ground, // ControlFunction + DEL
+ State::ControlFunctionError, // ControlFunction + PAD
+
+ State::ControlFunctionError, // ControlFunctionError + C0
+ State::ControlFunctionError, // ControlFunctionError + C0S
+ State::ControlFunctionError, // ControlFunctionError + ESC
+ State::ControlFunctionError, // ControlFunctionError + INT
+ State::Ground, // ControlFunctionError + PAR
+ State::Ground, // ControlFunctionError + SEP
+ State::Ground, // ControlFunctionError + PRI
+ State::Ground, // ControlFunctionError + C1
+ State::Ground, // ControlFunctionError + CSO
+ State::Ground, // ControlFunctionError + SOS
+ State::Ground, // ControlFunctionError + SCI
+ State::Ground, // ControlFunctionError + CSI
+ State::Ground, // ControlFunctionError + ST
+ State::Ground, // ControlFunctionError + ICF
+ State::ControlFunctionError, // ControlFunctionError + DEL
+ State::ControlFunctionError, // ControlFunctionError + PAD
+
+ State::ControlStringError, // CommandString + C0
+ State::PushCommandString, // CommandString + C0S
+ State::CommandStringEscape, // CommandString + ESC
+ State::PushCommandString, // CommandString + INT
+ State::PushCommandString, // CommandString + PAR
+ State::PushCommandString, // CommandString + SEP
+ State::PushCommandString, // CommandString + PRI
+ State::PushCommandString, // CommandString + C1
+ State::PushCommandString, // CommandString + CSO
+ State::PushCommandString, // CommandString + SOS
+ State::PushCommandString, // CommandString + SCI
+ State::PushCommandString, // CommandString + CSI
+ State::PushCommandString, // CommandString + ST
+ State::PushCommandString, // CommandString + ICF
+ State::ControlStringError, // CommandString + DEL
+ State::ControlStringError, // CommandString + PAD
+
+ State::ControlStringError, // CommandStringEscape + C0
+ State::ControlStringError, // CommandStringEscape + C0S
+ State::ControlStringError, // CommandStringEscape + ESC
+ State::ControlStringError, // CommandStringEscape + INT
+ State::ControlStringError, // CommandStringEscape + PAR
+ State::ControlStringError, // CommandStringEscape + SEP
+ State::ControlStringError, // CommandStringEscape + PRI
+ State::ControlStringError, // CommandStringEscape + C1
+ State::ControlStringError, // CommandStringEscape + CSO
+ State::ControlStringError, // CommandStringEscape + SOS
+ State::ControlStringError, // CommandStringEscape + SCI
+ State::ControlStringError, // CommandStringEscape + CSI
+ State::FinishSequence, // CommandStringEscape + ST
+ State::ControlStringError, // CommandStringEscape + ICF
+ State::ControlStringError, // CommandStringEscape + DEL
+ State::ControlStringError, // CommandStringEscape + PAD
+
+ State::PushCharacterString, // CharacterString + C0
+ State::PushCharacterString, // CharacterString + C0S
+ State::CharacterStringEscape, // CharacterString + ESC
+ State::PushCharacterString, // CharacterString + INT
+ State::PushCharacterString, // CharacterString + PAR
+ State::PushCharacterString, // CharacterString + SEP
+ State::PushCharacterString, // CharacterString + PRI
+ State::PushCharacterString, // CharacterString + C1
+ State::PushCharacterString, // CharacterString + CSO
+ State::PushCharacterString, // CharacterString + SOS
+ State::PushCharacterString, // CharacterString + SCI
+ State::PushCharacterString, // CharacterString + CSI
+ State::PushCharacterString, // CharacterString + ST
+ State::PushCharacterString, // CharacterString + ICF
+ State::PushCharacterString, // CharacterString + DEL
+ State::ControlStringError, // CharacterString + PAD
+
+ State::PushCharacterStringEscape, // CharacterStringEscape + C0
+ State::PushCharacterStringEscape, // CharacterStringEscape + C0S
+ State::PushCharacterStringEscape, // CharacterStringEscape + ESC
+ State::PushCharacterStringEscape, // CharacterStringEscape + INT
+ State::PushCharacterStringEscape, // CharacterStringEscape + PAR
+ State::PushCharacterStringEscape, // CharacterStringEscape + SEP
+ State::PushCharacterStringEscape, // CharacterStringEscape + PRI
+ State::PushCharacterStringEscape, // CharacterStringEscape + C1
+ State::PushCharacterStringEscape, // CharacterStringEscape + CSO
+ State::ControlStringError, // CharacterStringEscape + SOS
+ State::PushCharacterStringEscape, // CharacterStringEscape + SCI
+ State::PushCharacterStringEscape, // CharacterStringEscape + CSI
+ State::FinishSequence, // CharacterStringEscape + ST
+ State::PushCharacterStringEscape, // CharacterStringEscape + ICF
+ State::PushCharacterStringEscape, // CharacterStringEscape + DEL
+ State::ControlStringError, // CharacterStringEscape + PAD
+
+ State::ControlStringError, // ControlStringError + C0
+ State::ControlStringError, // ControlStringError + C0S
+ State::ControlStringError, // ControlStringError + ESC
+ State::ControlStringError, // ControlStringError + INT
+ State::ControlStringError, // ControlStringError + PAR
+ State::ControlStringError, // ControlStringError + SEP
+ State::ControlStringError, // ControlStringError + PRI
+ State::ControlStringError, // ControlStringError + C1
+ State::ControlStringError, // ControlStringError + CSO
+ State::ControlStringError, // ControlStringError + SOS
+ State::ControlStringError, // ControlStringError + SCI
+ State::ControlStringError, // ControlStringError + CSI
+ State::Ground, // ControlStringError + ST
+ State::ControlStringError, // ControlStringError + ICF
+ State::ControlStringError, // ControlStringError + DEL
+ State::ControlStringError, // ControlStringError + PAD
+
+ State::Ground, // SingleCharacter + C0
+ State::FinishSequence, // SingleCharacter + C0S
+ State::Ground, // SingleCharacter + ESC
+ State::FinishSequence, // SingleCharacter + INT
+ State::FinishSequence, // SingleCharacter + PAR
+ State::FinishSequence, // SingleCharacter + SEP
+ State::FinishSequence, // SingleCharacter + PRI
+ State::FinishSequence, // SingleCharacter + C1
+ State::FinishSequence, // SingleCharacter + CSO
+ State::FinishSequence, // SingleCharacter + SOS
+ State::FinishSequence, // SingleCharacter + SCI
+ State::FinishSequence, // SingleCharacter + CSI
+ State::FinishSequence, // SingleCharacter + ST
+ State::FinishSequence, // SingleCharacter + ICF
+ State::Ground, // SingleCharacter + DEL
+ State::Ground, // SingleCharacter + PAD
+
+ State::ControlSequenceError, // ControlSequence + C0
+ State::ControlSequenceError, // ControlSequence + C0S
+ State::ControlSequenceError, // ControlSequence + ESC
+ State::ControlSequencePushIntermediate, // ControlSequence + INT
+ State::ControlSequenceAddParameter, // ControlSequence + PAR
+ State::ControlSequencePushParameter, // ControlSequence + SEP
+ State::PrivateControlSequence, // ControlSequence + PRI
+ State::FinishControlSequence, // ControlSequence + C1
+ State::FinishControlSequence, // ControlSequence + CSO
+ State::FinishControlSequence, // ControlSequence + SOS
+ State::FinishControlSequence, // ControlSequence + SCI
+ State::FinishControlSequence, // ControlSequence + CSI
+ State::FinishControlSequence, // ControlSequence + ST
+ State::FinishControlSequence, // ControlSequence + ICF
+ State::ControlSequenceError, // ControlSequence + DEL
+ State::ControlSequenceError, // ControlSequence + PAD
+
+ State::ControlSequenceError, // ControlSequenceParameter + C0
+ State::ControlSequenceError, // ControlSequenceParameter + C0S
+ State::ControlSequenceError, // ControlSequenceParameter + ESC
+ State::ControlSequenceParameterIntermediate, // ControlSequenceParameter + INT
+ State::ControlSequenceAddParameter, // ControlSequenceParameter + PAR
+ State::ControlSequencePushParameter, // ControlSequenceParameter + SEP
+ State::ControlSequenceError, // ControlSequenceParameter + PRI
+ State::FinishControlSequence, // ControlSequenceParameter + C1
+ State::FinishControlSequence, // ControlSequenceParameter + CSO
+ State::FinishControlSequence, // ControlSequenceParameter + SOS
+ State::FinishControlSequence, // ControlSequenceParameter + SCI
+ State::FinishControlSequence, // ControlSequenceParameter + CSI
+ State::FinishControlSequence, // ControlSequenceParameter + ST
+ State::FinishControlSequence, // ControlSequenceParameter + ICF
+ State::ControlSequenceError, // ControlSequenceParameter + DEL
+ State::ControlSequenceError, // ControlSequenceParameter + PAD
+
+ State::ControlSequenceError, // ControlSequenceIntermediate + C0
+ State::ControlSequenceError, // ControlSequenceIntermediate + C0S
+ State::ControlSequenceError, // ControlSequenceIntermediate + ESC
+ State::ControlSequencePushIntermediate, // ControlSequenceIntermediate + INT
+ State::ControlSequenceError, // ControlSequenceIntermediate + PAR
+ State::ControlSequenceError, // ControlSequenceIntermediate + SEP
+ State::ControlSequenceError, // ControlSequenceIntermediate + PRI
+ State::FinishControlSequence, // ControlSequenceIntermediate + C1
+ State::FinishControlSequence, // ControlSequenceIntermediate + CSO
+ State::FinishControlSequence, // ControlSequenceIntermediate + SOS
+ State::FinishControlSequence, // ControlSequenceIntermediate + SCI
+ State::FinishControlSequence, // ControlSequenceIntermediate + CSI
+ State::FinishControlSequence, // ControlSequenceIntermediate + ST
+ State::FinishControlSequence, // ControlSequenceIntermediate + ICF
+ State::ControlSequenceError, // ControlSequenceIntermediate + DEL
+ State::ControlSequenceError, // ControlSequenceIntermediate + PAD
+
+ State::ControlSequenceError, // ControlSequenceError + C0
+ State::ControlSequenceError, // ControlSequenceError + C0S
+ State::ControlSequenceError, // ControlSequenceError + ESC
+ State::ControlSequenceError, // ControlSequenceError + INT
+ State::ControlSequenceError, // ControlSequenceError + PAR
+ State::ControlSequenceError, // ControlSequenceError + SEP
+ State::ControlSequenceError, // ControlSequenceError + PRI
+ State::Ground, // ControlSequenceError + C1
+ State::Ground, // ControlSequenceError + CSO
+ State::Ground, // ControlSequenceError + SOS
+ State::Ground, // ControlSequenceError + SCI
+ State::Ground, // ControlSequenceError + CSI
+ State::Ground, // ControlSequenceError + ST
+ State::Ground, // ControlSequenceError + ICF
+ State::ControlSequenceError, // ControlSequenceError + DEL
+ State::ControlSequenceError, // ControlSequenceError + PAD
+];
+
+#[repr(u8)]
+#[derive(Copy, Clone)]
+enum Class {
+ /// C0 Control Functions
+ ///
+ /// 00..1F
+ C0,
+
+ /// C0 Control Functions permitted in Control Strings
+ ///
+ /// 08..0D
+ C0S,
+
+ /// ESCAPE
+ ///
+ /// 1B
+ ESC,
+
+ /// Control Function / Control Sequence Intermediate Bytes
+ ///
+ /// 20..2F
+ INT,
+
+ /// Control Sequence Parameter Bytes
+ ///
+ /// 30..39
+ PAR,
+
+ /// Control Sequence Parameter Separators
+ ///
+ /// 3A..3B
+ SEP,
+
+ /// Control Sequence Private Parameter String Indicator
+ ///
+ /// 3C..3F
+ PRI,
+
+ /// C1 Control Functions
+ ///
+ /// ESC 40..5F
+ C1,
+
+ /// Command String Opening Delimiter
+ ///
+ /// ESC 50, ESC 5D..5F
+ CSO,
+
+ /// Start Of String
+ ///
+ /// ESC 58
+ SOS,
+
+ /// Single Character Introducer
+ ///
+ /// ESC 5A
+ SCI,
+
+ /// Control Sequence Introducer
+ ///
+ /// ESC 5B
+ CSI,
+
+ /// String Terminator
+ ///
+ /// ESC 5C
+ ST,
+
+ /// Independent Control Function Final Bytes
+ ///
+ /// 60..7E
+ ICF,
+
+ /// DELETE
+ ///
+ /// 7F
+ DEL,
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+enum State {
+ // All base states
+
+ Ground = 0x00,
+ Escape = 0x10,
+ ControlFunction = 0x20,
+ ControlFunctionError = 0x30,
+ CommandString = 0x40,
+ CommandStringEscape = 0x50,
+ CharacterString = 0x60,
+ CharacterStringEscape = 0x70,
+ ControlStringError = 0x80,
+ SingleCharacter = 0x90,
+ ControlSequence = 0xA0,
+ ControlSequenceParameter = 0xB0,
+ ControlSequenceIntermediate = 0xC0,
+ ControlSequenceError = 0xD0,
+
+ // All action states
+ // The upper 4 bits set a base state to return to (see above),
+ // the lower 4 bits set an action to perform (see below)
+ // Base states impicitly have Action::Continue.
+
+ C0Control =
+ State::Ground as u8 | Action::C01Control as u8,
+
+ Char =
+ State::Ground as u8 | Action::Char as u8,
+
+ StartEscape =
+ State::Escape as u8 | Action::StartSequence as u8,
+
+ PushIntermediateByte =
+ State::ControlFunction as u8 | Action::PushByte as u8,
+
+ C1Control =
+ State::Escape as u8 | Action::C01Control as u8,
+
+ FinishSequence =
+ State::Ground as u8 | Action::FinishSequence as u8,
+
+ StartCommandString =
+ State::CommandString as u8 | Action::StartSequence as u8,
+
+ StartCharacterString =
+ State::CharacterString as u8 | Action::StartSequence as u8,
+
+ StartSingleCharacter =
+ State::SingleCharacter as u8 | Action::StartSequence as u8,
+
+ StartControlSequence =
+ State::ControlSequence as u8 | Action::StartSequence as u8,
+
+ PushCommandString =
+ State::CommandString as u8 | Action::PushByte as u8,
+
+ PushCharacterString =
+ State::CharacterString as u8 | Action::PushByte as u8,
+
+ PushCharacterStringEscape =
+ State::CharacterString as u8 | Action::PushByteWithEscape as u8,
+
+ PrivateControlSequence =
+ State::ControlSequence as u8 | Action::SetPrivate as u8,
+
+ ControlSequencePushParameter =
+ State::ControlSequence as u8 | Action::PushParam as u8,
+
+ ControlSequenceAddParameter =
+ State::ControlSequenceParameter as u8 | Action::AddParamValue as u8,
+
+ ControlSequenceParameterIntermediate =
+ State::ControlSequenceIntermediate as u8 | Action::PushParamAndByte as u8,
+
+ ControlSequencePushIntermediate =
+ State::ControlSequenceIntermediate as u8 | Action::PushByte as u8,
+
+ FinishControlSequence =
+ State::Ground as u8 | Action::PushParamAndEndSequence as u8,
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone)]
+enum Action {
+ // Variant is never constructed, but is matched on
+ #[allow(dead_code)]
+ /// Return Continue
+ Continue,
+
+ /// Return Char
+ Char,
+
+ /// Set `start`, return Control
+ C01Control,
+
+ /// Set `start`, return Continue
+ StartSequence,
+
+ /// Set `end`, return Control
+ FinishSequence,
+
+ /// Push `byte`, return Continue
+ PushByte,
+
+ /// Push `Escape`, push `byte`, return Continue
+ PushByteWithEscape,
+
+ /// Set `private`, return Continue
+ SetPrivate,
+
+ /// Add to parameter value, return Continue
+ AddParamValue,
+
+ /// Push `param`, return Continue
+ PushParam,
+
+ /// Push `param`, push `byte`, return Continue
+ PushParamAndByte,
+
+ /// Push `param`, set `end`, return Control
+ PushParamAndEndSequence,
+}
+
+impl State {
+ /// Decomposes a state into base state and parser action.
+ fn decompose(self) -> (State, Action) {
+ use std::mem::transmute as cast;
+
+ unsafe {
+ (cast(self as u8 & 0xF0), cast(self as u8 & 0x0F))
+ }
+ }
+
+ /// Poisons the state
+ fn poison(&mut self) {
+ *self = STATE_TABLE[*self as usize + 0xF];
+ }
+}
+
+impl Default for State {
+ fn default() -> State {
+ State::Ground
+ }
+}
+
+
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+/// A control sequence parameter.
+pub enum Parameter {
+ /// A for the control sequence appropriate default value should be used.
+ Default,
+ /// The parameter has a value.
+ Value(u16)
+}
+
+impl Parameter {
+ pub fn new(v: u16) -> Self {
+ Self::Value(v)
+ }
+
+ /// Returns the value of the parameter, if present, otherwise returns the given default.
+ pub fn value_or(&self, or: u16) -> u16 {
+ match self {
+ Self::Default => or,
+ Self::Value(v) => *v,
+ }
+ }
+
+ /// Parsing parameters requires an [`atoi`]-like loop.
+ ///
+ /// Parameter value overflow causes the sequence to be rejected.
+ ///
+ /// [`atoi`]: https://en.cppreference.com/w/c/string/byte/atoi
+ pub fn add(&mut self, x: u16) -> bool {
+ match self {
+ Self::Default => {
+ *self = Self::Value(x);
+ false
+ },
+
+ Self::Value(v) => {
+ let (v2, oflw) = v.overflowing_add(x);
+ *v = v2;
+ oflw
+ }
+ }
+ }
+}
+
+impl Default for Parameter {
+ fn default() -> Self {
+ Self::Default
+ }
+}
+
+#[derive(Clone, Eq, PartialEq, Debug, Default)]
+pub struct ControlFunction {
+ /// The start of the control function.
+ ///
+ /// For C0 and C1 controls, which are only 1 byte,
+ /// this is the only necessary field.
+ start: u8,
+ /// Whether this control sequence has a private parameter string.
+ private: bool,
+ /// The parameters of the control sequence, if it is one.
+ params: Vec<Parameter>,
+ /// If this function is a control string,
+ /// this is the string's content.
+ ///
+ /// Otherwise, it's the intermediate bytes of the function.
+ /// For control sequences with private parameters, this contains the raw parameter string.
+ bytes: Vec<u8>,
+ /// The final byte of the control function.
+ ///
+ /// For C0 and C1 controls, as well as control strings,
+ /// this field is left unset.
+ end: u8,
+}
+
+#[derive(Clone, Eq, PartialEq, Debug)]
+pub enum TerminalInput<'a> {
+ Continue,
+ Char(char),
+ // If the UTF-8 decoder had to synchronize, two characters have to be inserted
+ SyncChar(char, char),
+ // FIXME: Passing this by reference saves on allocations,
+ // but currently requires that it is fully processed before parsing can continue.
+ // For performance, it may be better to pass a clone by value, and use a queue to avoid
+ // the input buffer getting clogged. Relevant for stuff that may take longer to evaluate,
+ // like SIXEL strings.
+ // Will require benchmarking though.
+ Control(&'a ControlFunction),
+
+ SyncControl(char, &'a ControlFunction)
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct TerminalInputParser {
+ /// The current parsing state.
+ state: State,
+ /// Container for parsed control function data.
+ ctl: ControlFunction,
+ /// Accumulator for current control sequence parameter.
+ pacc: Parameter,
+ /// UTF-8 character decoder.
+ utf8: utf8::UTF8Decoder
+}
+
+impl TerminalInputParser {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ pub fn parse_byte(&mut self, byte: u8) -> TerminalInput {
+ if byte >= 0x80 {
+ if self.state != State::Ground {
+ self.state.poison();
+ return TerminalInput::Continue;
+ }
+
+ // UTF-8 here
+ match self.utf8.decode_byte(byte) {
+ utf8::DecodeState::Continue => TerminalInput::Continue,
+ utf8::DecodeState::Done(c) => TerminalInput::Char(c),
+ utf8::DecodeState::Error => TerminalInput::Char('\u{FFFD}'),
+ utf8::DecodeState::Rewind => {
+ // Recurse, but only once
+ let again = self.parse_byte(byte);
+
+ match again {
+ TerminalInput::Continue => TerminalInput::Char('\u{FFFD}'),
+ TerminalInput::Char(c) => TerminalInput::SyncChar('\u{FFFD}', c),
+ TerminalInput::Control(ctl) => TerminalInput::SyncControl('\u{FFFD}', ctl),
+ // We can't hit UTF-8 Rewind from the base state,
+ // so we can never produce SyncChar or SyncControl here
+ _ => unsafe { std::hint::unreachable_unchecked() }
+ }
+ }
+ }
+ } else {
+ let class = CLASS_TABLE[byte as usize] as usize;
+
+ let state = unsafe {
+ *STATE_TABLE.get_unchecked(self.state as usize + class)
+ };
+
+ let (base, action) = state.decompose();
+
+ self.state = base;
+
+ match action {
+ Action::Continue => TerminalInput::Continue,
+ Action::Char => TerminalInput::Char(byte as char),
+ Action::C01Control => {
+ self.ctl.start = byte;
+ TerminalInput::Control(&self.ctl)
+ },
+
+ Action::StartSequence => {
+ self.ctl.start = byte;
+ self.ctl.params.clear();
+ self.ctl.bytes.clear();
+ TerminalInput::Continue
+ },
+
+ Action::FinishSequence => {
+ self.ctl.end = byte;
+ TerminalInput::Control(&self.ctl)
+ },
+
+ Action::PushByte => {
+ self.ctl.bytes.push(byte);
+ TerminalInput::Continue
+ },
+
+ Action::PushByteWithEscape => {
+ self.ctl.bytes.push(0x1B);
+ self.ctl.bytes.push(byte);
+ TerminalInput::Continue
+ },
+
+ Action::SetPrivate => {
+ self.ctl.private = true;
+ TerminalInput::Continue
+ },
+
+ Action::AddParamValue => {
+ let oflw = self.pacc.add(byte as u16 - 0x30);
+
+ // You can theoretically do this if-less
+ // using something like state ^= (0x70 * oflw)
+ // It just turns the branch into a conditional move and a xor.
+ if oflw {
+ self.state = State::ControlSequenceError;
+ }
+
+ TerminalInput::Continue
+ },
+
+ Action::PushParam => {
+ self.ctl.params.push(self.pacc);
+ self.pacc = Parameter::Default;
+
+ TerminalInput::Continue
+ },
+
+ Action::PushParamAndByte => {
+ self.ctl.bytes.push(byte);
+ self.ctl.params.push(self.pacc);
+ self.pacc = Parameter::Default;
+
+ TerminalInput::Continue
+ },
+
+ Action::PushParamAndEndSequence => {
+ self.ctl.params.push(self.pacc);
+ self.pacc = Parameter::Default;
+ self.ctl.end = byte;
+ TerminalInput::Control(&self.ctl)
+ }
+ }
+ }
+ }
+}
diff --git a/src/lib_old.rs b/src/lib_old.rs
new file mode 100644
index 0000000..90bf7f4
--- /dev/null
+++ b/src/lib_old.rs
@@ -0,0 +1,424 @@
+pub mod recognizer;
+pub mod v2;
+mod utf8;
+
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+/// A control sequence parameter.
+pub enum Parameter {
+ /// A for the control sequence appropriate default value should be used.
+ Default,
+ /// The parameter has a value.
+ Value(u16)
+}
+
+impl Parameter {
+ pub fn new(v: u16) -> Self {
+ Self::Value(v)
+ }
+
+ /// Returns the value of the parameter, if present, otherwise returns the given default.
+ pub fn value_or(&self, or: u16) -> u16 {
+ match self {
+ Self::Default => or,
+ Self::Value(v) => *v,
+ }
+ }
+
+ /// Parsing parameters requires an [`atoi`]-like loop.
+ ///
+ /// Parameter value overflow causes the sequence to be rejected.
+ ///
+ /// [`atoi`]: https://en.cppreference.com/w/c/string/byte/atoi
+ pub fn add(&mut self, x: u16) -> bool {
+ match self {
+ Self::Default => {
+ *self = Self::Value(x);
+ false
+ },
+
+ Self::Value(v) => {
+ let (v2, oflw) = v.overflowing_add(x);
+ *v = v2;
+ oflw
+ }
+ }
+ }
+}
+
+impl Default for Parameter {
+ fn default() -> Self {
+ Self::Default
+ }
+}
+
+#[derive(Clone, Eq, PartialEq, Debug, Default)]
+pub struct ControlFunction {
+ /// The start of the control function.
+ ///
+ /// For C0 and C1 controls, which are only 1 byte,
+ /// this is the only necessary field.
+ start: u8,
+ /// Whether this control sequence has a private parameter string.
+ private: bool,
+ /// The parameters of the control sequence, if it is one.
+ params: Vec<Parameter>,
+ /// If this function is a control string,
+ /// this is the string's content.
+ ///
+ /// Otherwise, it's the intermediate bytes of the function.
+ /// For control sequences with private parameters, this contains the raw parameter string.
+ bytes: Vec<u8>,
+ /// The final byte of the control function.
+ ///
+ /// For C0 and C1 controls, as well as control strings,
+ /// this field is left unset.
+ end: u8,
+}
+
+#[derive(Clone, Eq, PartialEq, Debug)]
+pub enum TerminalInput<'a> {
+ Continue,
+ Char(char),
+ // FIXME: Passing this by reference saves on allocations,
+ // but currently requires that it is fully processed before parsing can continue.
+ // For performance, it may be better to pass a clone by value, and use a queue to avoid
+ // the input buffer getting clogged. Relevant for stuff that may take longer to evaluate,
+ // like SIXEL strings.
+ // Will require benchmarking though.
+ Control(&'a ControlFunction),
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct TerminalInputParser {
+ /// The current parsing state.
+ state: State,
+ /// Container for parsed control function data.
+ ctl: ControlFunction,
+ /// Accumulator for current control sequence parameter.
+ pacc: Parameter,
+ // /// UTF-8 character decoder.
+ // utf8: UTF8Decoder
+}
+
+impl TerminalInputParser {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ pub fn parse_byte(&mut self, byte: u8) -> TerminalInput {
+
+ unimplemented!()
+ }
+}
+
+
+
+#[repr(u8)]
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+enum Class {
+ /// C0 Control Functions
+ ///
+ /// 00..1F
+ C0,
+
+ /// C0 Control Functions permitted in Control Strings
+ ///
+ /// 08..0D
+ C0S,
+
+ /// ESCAPE
+ ///
+ /// 1B
+ ESC,
+
+ /// Control Function / Control Sequence Intermediate Bytes
+ ///
+ /// 20..2F
+ INT,
+
+ /// Control Sequence Parameter Bytes
+ ///
+ /// 30..39
+ PAR,
+
+ /// Control Sequence Parameter Separators
+ ///
+ /// 3A..3B
+ SEP,
+
+ /// Control Sequence Private Parameter String Indicator
+ ///
+ /// 3C..3F
+ PRI,
+
+ /// C1 Control Functions
+ ///
+ /// ESC 40..5F
+ C1,
+
+ /// Command String Opening Delimiter
+ ///
+ /// ESC 50, ESC 5D..5F
+ CSO,
+
+ /// Start Of String
+ ///
+ /// ESC 58
+ SOS,
+
+ /// Single Character Introducer
+ ///
+ /// ESC 5A
+ SCI,
+
+ /// Control Sequence Introducer
+ ///
+ /// ESC 5B
+ CSI,
+
+ /// String Terminator
+ ///
+ /// ESC 5C
+ ST,
+
+ /// Independent Control Function Final Bytes
+ ///
+ /// 60..7E
+ ICF,
+
+ /// DELETE
+ ///
+ /// 7F
+ DEL,
+}
+
+use Class::*;
+
+/// Byte to Class translation table
+const CLASS_TABLE: [Class; 128] = [
+ C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0S,C0S,C0S,C0S,C0S,C0S,C0 ,C0 ,
+ C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,ESC,C0 ,C0 ,C0 ,C0 ,
+ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
+ PAR,PAR,PAR,PAR,PAR,PAR,PAR,PAR,PAR,PAR,SEP,SEP,PRI,PRI,PRI,PRI,
+ C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,
+ CSO,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,SOS,C1 ,SCI,CSI,ST ,CSO,CSO,CSO,
+ ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,
+ ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,DEL,
+];
+
+/// State + Class to State transition table
+const STATE_TABLE: [State; 185] = [State::OK;185];
+
+#[repr(u8)]
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+enum Action {
+ Continue,
+ ReturnChar,
+ C01Control,
+ StartSequence,
+ FinishSequence,
+ PushByte,
+ SetPrivate,
+ PushLastParam,
+ PushParamAndByte,
+ PushParam,
+ AddParamValue,
+}
+
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+// NCB: 20..7E
+// CFF: 30..7E
+// CSC: 08..0D,20..7E
+
+// 11 unique actions
+// return char
+// return continue
+// set start, return control
+// set start, return continue
+// set end, return control
+// push byte, return continue
+// set private, return continue
+// push param, set end, return control
+// push param, push byte, return continue
+// push param, return continue
+// add value, return continue
+
+// always return to some base state, which is a multiple of 15
+// for example, we will never match against state 61
+// use modulo 15 arithmetic to encode 61 as "action 1, state 60"
+// modulo 15 not ideal
+// pad rows to 16 so we can use modulo 16 (easy bitmask)
+
+
+
+enum State {
+ /// Base state
+ ///
+ /// ```text,ignore
+ /// C0 -> OK_C01 (set start, return control)
+ /// ESC -> ESC (set start, return continue)
+ /// NCB -> OK_NCB ( return char)
+ /// DEL -> OK ( return continue)
+ /// ```
+ OK = 0x00,
+
+ /// Received ESCAPE
+ ///
+ /// ```text,ignore
+ /// C0 -> OK_C01 (set start, return control)
+ /// INT -> CF (push byte, return continue)
+ /// CFF -> OK_CF (set end, return control)
+ /// C1 -> OK_C01 (set start, return control)
+ /// CSO -> CMS (set start, return continue)
+ /// SOS -> SOS (set start, return continue)
+ /// SCI -> SCI (set start, return continue)
+ /// CSI -> CSI (set start, return continue)
+ /// _ -> OK ( return continue)
+ /// ```
+ ESC = 0x10,
+
+ /// Control Function
+ ///
+ /// ```text,ignore
+ /// INT -> CF (push byte, return continue)
+ /// CFF -> OK_CF (set end, return control)
+ /// _ -> ERR_CF ( return continue)
+ /// ```
+ CF = 0x20,
+
+ /// Poisoned Control Function
+ ///
+ /// ```text,ignore
+ /// CFF -> OK (return continue)
+ /// _ -> ERR_CF (return continue)
+ /// ```
+ ERR_CF = 0x30,
+
+ /// Command String
+ ///
+ /// ```text,ignore
+ /// ESC -> CMS_ESC ( return continue)
+ /// CSC -> CMS_ACC (push byte, return continue)
+ /// _ -> ERR_CMS ( return continue)
+ /// ```
+ CMS = 0x40,
+
+ /// Command String, Received ESCAPE
+ ///
+ /// ```text,ignore
+ /// ST -> OK_CF (set end, return control)
+ /// _ -> ERR_CMS ( return continue)
+ /// ```
+ CMS_ESC = 0x50,
+
+ /// Poisoned Command String
+ ///
+ /// ```text,ignore
+ /// ESC -> CMS_ESC (return continue)
+ /// _ -> ERR_CMS (return continue)
+ /// ```
+ ERR_CMS = 0x60,
+
+ /// Start Of String
+ ///
+ /// ```text,ignore
+ /// ESC -> SOS_ESC ( return continue)
+ /// _ -> SOS_ACC (push byte, return continue)
+ /// ```
+ SOS = 0x70,
+
+ /// Start Of String, Received ESCAPE
+ ///
+ /// ```text,ignore
+ /// ST -> OK_CF (set end, return control)
+ /// SOS -> ERR_CMS ( return continue)
+ /// _ -> SOS_ACC (push byte, return continue)
+ /// ```
+ SOS_ESC = 0x80,
+
+ /// Single Character Introducer
+ ///
+ /// ```text,ignore
+ /// CSC -> OK_CF (set end, return control)
+ /// _ -> OK ( return continue)
+ /// ```
+ SCI = 0x90,
+
+ /// Control Sequence Introducer
+ ///
+ /// ```text,ignore
+ /// PRI -> CSI_PRI (set private, return continue)
+ /// PAR -> CSI_PAR (add value, return continue)
+ /// SEP -> CSI_SEP (push param, return continue)
+ /// INT -> CSI_INT (push byte, return continue)
+ /// CSF -> OK_CF (set end, return control)
+ /// _ -> ERR_CSI ( return continue)
+ /// ```
+ CSI = 0xA0,
+
+ /// Control Sequence Introducer, Received Parameter Byte
+ ///
+ /// ```text,ignore
+ /// PAR -> CSI_PAR (add value, return continue)
+ /// SEP -> CSI_SEP (push param, return continue)
+ /// INT -> CSI_PIN (push param, push byte, return continue)
+ /// CSF -> OK_CSI (push param, set end, return control)
+ /// _ -> ERR_CSI ( return continue)
+ /// ```
+ CSI_PAR = 0xB0,
+
+ /// Control Sequence Introducer, Received Intermediate Byte
+ ///
+ /// ```text,ignore
+ /// INT -> CSI_INT (push byte, return continue)
+ /// CSF -> OK_CF (set end, return control)
+ /// _ -> ERR_CSI ( return continue)
+ /// ```
+ CSI_INT = 0xC0,
+
+ /// Poisoned Control Sequence Introducer
+ ///
+ /// ```text,ignore
+ /// CSF -> OK (return continue)
+ /// _ -> ERR_CSI (return continue)
+ /// ```
+ ERR_CSI = 0xD0,
+
+ // All states with an action.
+ // Base states are implicitly Action::Continue.
+ OK_C01 = State::OK as u8 | Action::C01Control as u8,
+ OK_NCB = State::OK as u8 | Action::ReturnChar as u8,
+ TR_ESC = State::ESC as u8 | Action::StartSequence as u8,
+ TR_CF = State::CF as u8 | Action::PushByte as u8,
+ OK_CF = State::OK as u8 | Action::FinishSequence as u8,
+ TR_CMS = State::CMS as u8 | Action::StartSequence as u8,
+ TR_SOS = State::SOS as u8 | Action::StartSequence as u8,
+ TR_SCI = State::SCI as u8 | Action::StartSequence as u8,
+ TR_CSI = State::CSI as u8 | Action::StartSequence as u8,
+ CMS_ACC = State::CMS as u8 | Action::PushByte as u8,
+ SOS_ACC = State::SOS as u8 | Action::PushByte as u8,
+ CSI_PRI = State::CSI as u8 | Action::SetPrivate as u8,
+ CSI_SEP = State::CSI as u8 | Action::PushParam as u8,
+ CSI_PAC = State::CSI as u8 | Action::AddParamValue as u8,
+ CSI_IAC = State::CSI as u8 | Action::PushByte as u8,
+ CSI_PIN = State::CSI as u8 | Action::PushParamAndByte as u8,
+}
+
+impl State {
+ /// Decomposes a state into base state and parser action.
+ fn decompose(self) -> (State, Action) {
+ use std::mem::transmute as cast;
+
+ unsafe {
+ (cast(self as u8 & 0xF0), cast(self as u8 & 0x0F))
+ }
+ }
+}
+
+impl Default for State {
+ fn default() -> State {
+ State::OK
+ }
+}
diff --git a/src/recognizer.rs b/src/recognizer.rs
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/recognizer.rs
diff --git a/src/utf8.rs b/src/utf8.rs
new file mode 100644
index 0000000..0a3ffc8
--- /dev/null
+++ b/src/utf8.rs
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+ * and associated documentation files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or
+ * substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// See https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for more information on how this works.
+
+/// Decoder ground state.
+const OK: u8 = 0;
+/// Decoder error state.
+const ER: u8 = 96;
+/// Decoder error state. Offending byte should be passed in again ("rewind")
+const RW: u8 = 108;
+
+const UTF8_TABLE: [u8; 256+96] = [
+ // Maps bytes to character classes
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 0x90
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 0xA0
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xC0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, // 0xE0
+ 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 0xF0
+
+ // Maps state (row) + character class (column) to next state
+ OK, ER, 12, 24, 48, 84, 72, ER, ER, ER, 36, 60, // 0 - OK
+ RW, OK, RW, RW, RW, RW, RW, OK, RW, OK, RW, RW, // 12 - 1 byte needed
+ RW, 12, RW, RW, RW, RW, RW, 12, RW, 12, RW, RW, // 24 - 2 bytes needed
+ RW, RW, RW, RW, RW, RW, RW, 12, RW, RW, RW, RW, // 36 - 2 bytes needed, E0 lead
+ RW, 12, RW, RW, RW, RW, RW, RW, RW, 12, RW, RW, // 48 - 2 bytes needed, ED lead
+ RW, RW, RW, RW, RW, RW, RW, 24, RW, 24, RW, RW, // 60 - 3 bytes needed, F0 lead
+ RW, 24, RW, RW, RW, RW, RW, 24, RW, 24, RW, RW, // 72 - 3 bytes needed
+ RW, 24, RW, RW, RW, RW, RW, RW, RW, RW, RW, RW, // 84 - 3 bytes needed, F4 lead
+];
+
+#[derive(Copy, Clone, Debug, Default)]
+pub struct UTF8Decoder {
+ code_point: u32,
+ state: u8
+}
+
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub enum DecodeState {
+ Done(char),
+ Continue,
+ Error,
+ Rewind
+}
+
+impl UTF8Decoder {
+ #[inline]
+ pub fn reset(&mut self) {
+ self.state = 0;
+ }
+
+ pub fn decode_byte(&mut self, byte: u8) -> DecodeState {
+ let class = UTF8_TABLE[byte as usize];
+
+ self.code_point =
+ if self.state == OK {
+ // The character class values for leading bytes simultaneously form a bitmask.
+ // For class 0, this is a no-op
+ // For classes > 7, this is 0 (continuations & invalid bytes)
+ (0xFF >> class) & byte as u32
+ } else {
+ // Standard continuation byte extraction.
+ // It's okay if this is gibberish due to invalid input,
+ // errors reset state to OK, and code_point gets cleared on the next input.
+ (self.code_point << 6) | (byte as u32 & 0x3F)
+ };
+
+ unsafe {
+ // The compiler can't verify this access is always in bounds, but it is, I promise.
+ self.state = *UTF8_TABLE.get_unchecked(256 + self.state as usize + class as usize);
+
+ match self.state {
+ // Surrogate or out of bounds code points will be rejected, so this is safe.
+ OK => DecodeState::Done(std::char::from_u32_unchecked(self.code_point)),
+ ER => { self.reset(); DecodeState::Error },
+ RW => { self.reset(); DecodeState::Rewind },
+ _ => DecodeState::Continue
+ }
+ }
+ }
+}