Initial Checkin.
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | Cargo.toml | 10 | ||||
| -rw-r--r-- | LICENSE | 373 | ||||
| -rw-r--r-- | src/lib.rs | 715 | ||||
| -rw-r--r-- | src/lib_old.rs | 424 | ||||
| -rw-r--r-- | src/recognizer.rs | 0 | ||||
| -rw-r--r-- | src/utf8.rs | 108 |
7 files changed, 1633 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6936990 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +**/*.rs.bk +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a839dd5 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "ctlfun" +version = "0.1.0" +authors = ["Luna Catkins <[email protected]>"] +edition = "2018" +license = "MPL-2.0" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..4b9585d --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,715 @@ +pub mod recognizer; + +mod utf8; + +const CLASS_TABLE: [Class; 0x80] = [ + Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 , + Class::C0S,Class::C0S,Class::C0S,Class::C0S,Class::C0S,Class::C0S,Class::C0 ,Class::C0 , + + Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 , + Class::C0 ,Class::C0 ,Class::C0 ,Class::ESC,Class::C0 ,Class::C0 ,Class::C0 ,Class::C0 , + + Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT, + Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT,Class::INT, + + Class::PAR,Class::PAR,Class::PAR,Class::PAR,Class::PAR,Class::PAR,Class::PAR,Class::PAR, + Class::PAR,Class::PAR,Class::SEP,Class::SEP,Class::PRI,Class::PRI,Class::PRI,Class::PRI, + + Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 , + Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 , + + Class::CSO,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 ,Class::C1 , + Class::SOS,Class::C1 ,Class::SCI,Class::CSI,Class::ST ,Class::CSO,Class::CSO,Class::CSO, + + Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF, + Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF, + + Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF, + Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::ICF,Class::DEL, +]; + +const STATE_TABLE: [State; 0xE0] = [ + State::C0Control, // Ground + C0 + State::C0Control, // Ground + C0S + State::StartEscape, // Ground + ESC + State::Char, // Ground + INT + State::Char, // Ground + PAR + State::Char, // Ground + SEP + State::Char, // Ground + PRI + State::Char, // Ground + C1 + State::Char, // Ground + CSO + State::Char, // Ground + SOS + State::Char, // Ground + SCI + State::Char, // Ground + CSI + State::Char, // Ground + ST + State::Char, // Ground + ICF + State::Ground, // Ground + DEL + State::Ground, // Ground + PAD + + State::C0Control, // Escape + C0 + State::C0Control, // Escape + C0S + State::StartEscape, // Escape + ESC + State::PushIntermediateByte, // Escape + INT + State::FinishSequence, // Escape + PAR + State::FinishSequence, // Escape + SEP + State::FinishSequence, // Escape + PRI + State::C1Control, // Escape + C1 + State::StartCommandString, // Escape + CSO + State::StartCharacterString, // Escape + SOS + State::StartSingleCharacter, // Escape + SCI + State::StartControlSequence, // Escape + CSI + State::C1Control, // Escape + ST + State::FinishSequence, // Escape + ICF + State::Ground, // Escape + DEL + State::Ground, // Escape + PAD + + State::ControlFunctionError, // ControlFunction + C0 + State::ControlFunctionError, // ControlFunction + C0S + State::ControlFunctionError, // ControlFunction + ESC + State::PushIntermediateByte, // ControlFunction + INT + State::FinishSequence, // ControlFunction + PAR + State::FinishSequence, // ControlFunction + SEP + State::FinishSequence, // ControlFunction + PRI + State::FinishSequence, // ControlFunction + C1 + State::FinishSequence, // ControlFunction + CSO + State::FinishSequence, // ControlFunction + SOS + State::FinishSequence, // ControlFunction + SCI + State::FinishSequence, // ControlFunction + CSI + State::FinishSequence, // ControlFunction + ST + State::FinishSequence, // ControlFunction + ICF + State::Ground, // ControlFunction + DEL + State::ControlFunctionError, // ControlFunction + PAD + + State::ControlFunctionError, // ControlFunctionError + C0 + State::ControlFunctionError, // ControlFunctionError + C0S + State::ControlFunctionError, // ControlFunctionError + ESC + State::ControlFunctionError, // ControlFunctionError + INT + State::Ground, // ControlFunctionError + PAR + State::Ground, // ControlFunctionError + SEP + State::Ground, // ControlFunctionError + PRI + State::Ground, // ControlFunctionError + C1 + State::Ground, // ControlFunctionError + CSO + State::Ground, // ControlFunctionError + SOS + State::Ground, // ControlFunctionError + SCI + State::Ground, // ControlFunctionError + CSI + State::Ground, // ControlFunctionError + ST + State::Ground, // ControlFunctionError + ICF + State::ControlFunctionError, // ControlFunctionError + DEL + State::ControlFunctionError, // ControlFunctionError + PAD + + State::ControlStringError, // CommandString + C0 + State::PushCommandString, // CommandString + C0S + State::CommandStringEscape, // CommandString + ESC + State::PushCommandString, // CommandString + INT + State::PushCommandString, // CommandString + PAR + State::PushCommandString, // CommandString + SEP + State::PushCommandString, // CommandString + PRI + State::PushCommandString, // CommandString + C1 + State::PushCommandString, // CommandString + CSO + State::PushCommandString, // CommandString + SOS + State::PushCommandString, // CommandString + SCI + State::PushCommandString, // CommandString + CSI + State::PushCommandString, // CommandString + ST + State::PushCommandString, // CommandString + ICF + State::ControlStringError, // CommandString + DEL + State::ControlStringError, // CommandString + PAD + + State::ControlStringError, // CommandStringEscape + C0 + State::ControlStringError, // CommandStringEscape + C0S + State::ControlStringError, // CommandStringEscape + ESC + State::ControlStringError, // CommandStringEscape + INT + State::ControlStringError, // CommandStringEscape + PAR + State::ControlStringError, // CommandStringEscape + SEP + State::ControlStringError, // CommandStringEscape + PRI + State::ControlStringError, // CommandStringEscape + C1 + State::ControlStringError, // CommandStringEscape + CSO + State::ControlStringError, // CommandStringEscape + SOS + State::ControlStringError, // CommandStringEscape + SCI + State::ControlStringError, // CommandStringEscape + CSI + State::FinishSequence, // CommandStringEscape + ST + State::ControlStringError, // CommandStringEscape + ICF + State::ControlStringError, // CommandStringEscape + DEL + State::ControlStringError, // CommandStringEscape + PAD + + State::PushCharacterString, // CharacterString + C0 + State::PushCharacterString, // CharacterString + C0S + State::CharacterStringEscape, // CharacterString + ESC + State::PushCharacterString, // CharacterString + INT + State::PushCharacterString, // CharacterString + PAR + State::PushCharacterString, // CharacterString + SEP + State::PushCharacterString, // CharacterString + PRI + State::PushCharacterString, // CharacterString + C1 + State::PushCharacterString, // CharacterString + CSO + State::PushCharacterString, // CharacterString + SOS + State::PushCharacterString, // CharacterString + SCI + State::PushCharacterString, // CharacterString + CSI + State::PushCharacterString, // CharacterString + ST + State::PushCharacterString, // CharacterString + ICF + State::PushCharacterString, // CharacterString + DEL + State::ControlStringError, // CharacterString + PAD + + State::PushCharacterStringEscape, // CharacterStringEscape + C0 + State::PushCharacterStringEscape, // CharacterStringEscape + C0S + State::PushCharacterStringEscape, // CharacterStringEscape + ESC + State::PushCharacterStringEscape, // CharacterStringEscape + INT + State::PushCharacterStringEscape, // CharacterStringEscape + PAR + State::PushCharacterStringEscape, // CharacterStringEscape + SEP + State::PushCharacterStringEscape, // CharacterStringEscape + PRI + State::PushCharacterStringEscape, // CharacterStringEscape + C1 + State::PushCharacterStringEscape, // CharacterStringEscape + CSO + State::ControlStringError, // CharacterStringEscape + SOS + State::PushCharacterStringEscape, // CharacterStringEscape + SCI + State::PushCharacterStringEscape, // CharacterStringEscape + CSI + State::FinishSequence, // CharacterStringEscape + ST + State::PushCharacterStringEscape, // CharacterStringEscape + ICF + State::PushCharacterStringEscape, // CharacterStringEscape + DEL + State::ControlStringError, // CharacterStringEscape + PAD + + State::ControlStringError, // ControlStringError + C0 + State::ControlStringError, // ControlStringError + C0S + State::ControlStringError, // ControlStringError + ESC + State::ControlStringError, // ControlStringError + INT + State::ControlStringError, // ControlStringError + PAR + State::ControlStringError, // ControlStringError + SEP + State::ControlStringError, // ControlStringError + PRI + State::ControlStringError, // ControlStringError + C1 + State::ControlStringError, // ControlStringError + CSO + State::ControlStringError, // ControlStringError + SOS + State::ControlStringError, // ControlStringError + SCI + State::ControlStringError, // ControlStringError + CSI + State::Ground, // ControlStringError + ST + State::ControlStringError, // ControlStringError + ICF + State::ControlStringError, // ControlStringError + DEL + State::ControlStringError, // ControlStringError + PAD + + State::Ground, // SingleCharacter + C0 + State::FinishSequence, // SingleCharacter + C0S + State::Ground, // SingleCharacter + ESC + State::FinishSequence, // SingleCharacter + INT + State::FinishSequence, // SingleCharacter + PAR + State::FinishSequence, // SingleCharacter + SEP + State::FinishSequence, // SingleCharacter + PRI + State::FinishSequence, // SingleCharacter + C1 + State::FinishSequence, // SingleCharacter + CSO + State::FinishSequence, // SingleCharacter + SOS + State::FinishSequence, // SingleCharacter + SCI + State::FinishSequence, // SingleCharacter + CSI + State::FinishSequence, // SingleCharacter + ST + State::FinishSequence, // SingleCharacter + ICF + State::Ground, // SingleCharacter + DEL + State::Ground, // SingleCharacter + PAD + + State::ControlSequenceError, // ControlSequence + C0 + State::ControlSequenceError, // ControlSequence + C0S + State::ControlSequenceError, // ControlSequence + ESC + State::ControlSequencePushIntermediate, // ControlSequence + INT + State::ControlSequenceAddParameter, // ControlSequence + PAR + State::ControlSequencePushParameter, // ControlSequence + SEP + State::PrivateControlSequence, // ControlSequence + PRI + State::FinishControlSequence, // ControlSequence + C1 + State::FinishControlSequence, // ControlSequence + CSO + State::FinishControlSequence, // ControlSequence + SOS + State::FinishControlSequence, // ControlSequence + SCI + State::FinishControlSequence, // ControlSequence + CSI + State::FinishControlSequence, // ControlSequence + ST + State::FinishControlSequence, // ControlSequence + ICF + State::ControlSequenceError, // ControlSequence + DEL + State::ControlSequenceError, // ControlSequence + PAD + + State::ControlSequenceError, // ControlSequenceParameter + C0 + State::ControlSequenceError, // ControlSequenceParameter + C0S + State::ControlSequenceError, // ControlSequenceParameter + ESC + State::ControlSequenceParameterIntermediate, // ControlSequenceParameter + INT + State::ControlSequenceAddParameter, // ControlSequenceParameter + PAR + State::ControlSequencePushParameter, // ControlSequenceParameter + SEP + State::ControlSequenceError, // ControlSequenceParameter + PRI + State::FinishControlSequence, // ControlSequenceParameter + C1 + State::FinishControlSequence, // ControlSequenceParameter + CSO + State::FinishControlSequence, // ControlSequenceParameter + SOS + State::FinishControlSequence, // ControlSequenceParameter + SCI + State::FinishControlSequence, // ControlSequenceParameter + CSI + State::FinishControlSequence, // ControlSequenceParameter + ST + State::FinishControlSequence, // ControlSequenceParameter + ICF + State::ControlSequenceError, // ControlSequenceParameter + DEL + State::ControlSequenceError, // ControlSequenceParameter + PAD + + State::ControlSequenceError, // ControlSequenceIntermediate + C0 + State::ControlSequenceError, // ControlSequenceIntermediate + C0S + State::ControlSequenceError, // ControlSequenceIntermediate + ESC + State::ControlSequencePushIntermediate, // ControlSequenceIntermediate + INT + State::ControlSequenceError, // ControlSequenceIntermediate + PAR + State::ControlSequenceError, // ControlSequenceIntermediate + SEP + State::ControlSequenceError, // ControlSequenceIntermediate + PRI + State::FinishControlSequence, // ControlSequenceIntermediate + C1 + State::FinishControlSequence, // ControlSequenceIntermediate + CSO + State::FinishControlSequence, // ControlSequenceIntermediate + SOS + State::FinishControlSequence, // ControlSequenceIntermediate + SCI + State::FinishControlSequence, // ControlSequenceIntermediate + CSI + State::FinishControlSequence, // ControlSequenceIntermediate + ST + State::FinishControlSequence, // ControlSequenceIntermediate + ICF + State::ControlSequenceError, // ControlSequenceIntermediate + DEL + State::ControlSequenceError, // ControlSequenceIntermediate + PAD + + State::ControlSequenceError, // ControlSequenceError + C0 + State::ControlSequenceError, // ControlSequenceError + C0S + State::ControlSequenceError, // ControlSequenceError + ESC + State::ControlSequenceError, // ControlSequenceError + INT + State::ControlSequenceError, // ControlSequenceError + PAR + State::ControlSequenceError, // ControlSequenceError + SEP + State::ControlSequenceError, // ControlSequenceError + PRI + State::Ground, // ControlSequenceError + C1 + State::Ground, // ControlSequenceError + CSO + State::Ground, // ControlSequenceError + SOS + State::Ground, // ControlSequenceError + SCI + State::Ground, // ControlSequenceError + CSI + State::Ground, // ControlSequenceError + ST + State::Ground, // ControlSequenceError + ICF + State::ControlSequenceError, // ControlSequenceError + DEL + State::ControlSequenceError, // ControlSequenceError + PAD +]; + +#[repr(u8)] +#[derive(Copy, Clone)] +enum Class { + /// C0 Control Functions + /// + /// 00..1F + C0, + + /// C0 Control Functions permitted in Control Strings + /// + /// 08..0D + C0S, + + /// ESCAPE + /// + /// 1B + ESC, + + /// Control Function / Control Sequence Intermediate Bytes + /// + /// 20..2F + INT, + + /// Control Sequence Parameter Bytes + /// + /// 30..39 + PAR, + + /// Control Sequence Parameter Separators + /// + /// 3A..3B + SEP, + + /// Control Sequence Private Parameter String Indicator + /// + /// 3C..3F + PRI, + + /// C1 Control Functions + /// + /// ESC 40..5F + C1, + + /// Command String Opening Delimiter + /// + /// ESC 50, ESC 5D..5F + CSO, + + /// Start Of String + /// + /// ESC 58 + SOS, + + /// Single Character Introducer + /// + /// ESC 5A + SCI, + + /// Control Sequence Introducer + /// + /// ESC 5B + CSI, + + /// String Terminator + /// + /// ESC 5C + ST, + + /// Independent Control Function Final Bytes + /// + /// 60..7E + ICF, + + /// DELETE + /// + /// 7F + DEL, +} + +#[repr(u8)] +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum State { + // All base states + + Ground = 0x00, + Escape = 0x10, + ControlFunction = 0x20, + ControlFunctionError = 0x30, + CommandString = 0x40, + CommandStringEscape = 0x50, + CharacterString = 0x60, + CharacterStringEscape = 0x70, + ControlStringError = 0x80, + SingleCharacter = 0x90, + ControlSequence = 0xA0, + ControlSequenceParameter = 0xB0, + ControlSequenceIntermediate = 0xC0, + ControlSequenceError = 0xD0, + + // All action states + // The upper 4 bits set a base state to return to (see above), + // the lower 4 bits set an action to perform (see below) + // Base states impicitly have Action::Continue. + + C0Control = + State::Ground as u8 | Action::C01Control as u8, + + Char = + State::Ground as u8 | Action::Char as u8, + + StartEscape = + State::Escape as u8 | Action::StartSequence as u8, + + PushIntermediateByte = + State::ControlFunction as u8 | Action::PushByte as u8, + + C1Control = + State::Escape as u8 | Action::C01Control as u8, + + FinishSequence = + State::Ground as u8 | Action::FinishSequence as u8, + + StartCommandString = + State::CommandString as u8 | Action::StartSequence as u8, + + StartCharacterString = + State::CharacterString as u8 | Action::StartSequence as u8, + + StartSingleCharacter = + State::SingleCharacter as u8 | Action::StartSequence as u8, + + StartControlSequence = + State::ControlSequence as u8 | Action::StartSequence as u8, + + PushCommandString = + State::CommandString as u8 | Action::PushByte as u8, + + PushCharacterString = + State::CharacterString as u8 | Action::PushByte as u8, + + PushCharacterStringEscape = + State::CharacterString as u8 | Action::PushByteWithEscape as u8, + + PrivateControlSequence = + State::ControlSequence as u8 | Action::SetPrivate as u8, + + ControlSequencePushParameter = + State::ControlSequence as u8 | Action::PushParam as u8, + + ControlSequenceAddParameter = + State::ControlSequenceParameter as u8 | Action::AddParamValue as u8, + + ControlSequenceParameterIntermediate = + State::ControlSequenceIntermediate as u8 | Action::PushParamAndByte as u8, + + ControlSequencePushIntermediate = + State::ControlSequenceIntermediate as u8 | Action::PushByte as u8, + + FinishControlSequence = + State::Ground as u8 | Action::PushParamAndEndSequence as u8, +} + +#[repr(u8)] +#[derive(Copy, Clone)] +enum Action { + // Variant is never constructed, but is matched on + #[allow(dead_code)] + /// Return Continue + Continue, + + /// Return Char + Char, + + /// Set `start`, return Control + C01Control, + + /// Set `start`, return Continue + StartSequence, + + /// Set `end`, return Control + FinishSequence, + + /// Push `byte`, return Continue + PushByte, + + /// Push `Escape`, push `byte`, return Continue + PushByteWithEscape, + + /// Set `private`, return Continue + SetPrivate, + + /// Add to parameter value, return Continue + AddParamValue, + + /// Push `param`, return Continue + PushParam, + + /// Push `param`, push `byte`, return Continue + PushParamAndByte, + + /// Push `param`, set `end`, return Control + PushParamAndEndSequence, +} + +impl State { + /// Decomposes a state into base state and parser action. + fn decompose(self) -> (State, Action) { + use std::mem::transmute as cast; + + unsafe { + (cast(self as u8 & 0xF0), cast(self as u8 & 0x0F)) + } + } + + /// Poisons the state + fn poison(&mut self) { + *self = STATE_TABLE[*self as usize + 0xF]; + } +} + +impl Default for State { + fn default() -> State { + State::Ground + } +} + + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +/// A control sequence parameter. +pub enum Parameter { + /// A for the control sequence appropriate default value should be used. + Default, + /// The parameter has a value. + Value(u16) +} + +impl Parameter { + pub fn new(v: u16) -> Self { + Self::Value(v) + } + + /// Returns the value of the parameter, if present, otherwise returns the given default. + pub fn value_or(&self, or: u16) -> u16 { + match self { + Self::Default => or, + Self::Value(v) => *v, + } + } + + /// Parsing parameters requires an [`atoi`]-like loop. + /// + /// Parameter value overflow causes the sequence to be rejected. + /// + /// [`atoi`]: https://en.cppreference.com/w/c/string/byte/atoi + pub fn add(&mut self, x: u16) -> bool { + match self { + Self::Default => { + *self = Self::Value(x); + false + }, + + Self::Value(v) => { + let (v2, oflw) = v.overflowing_add(x); + *v = v2; + oflw + } + } + } +} + +impl Default for Parameter { + fn default() -> Self { + Self::Default + } +} + +#[derive(Clone, Eq, PartialEq, Debug, Default)] +pub struct ControlFunction { + /// The start of the control function. + /// + /// For C0 and C1 controls, which are only 1 byte, + /// this is the only necessary field. + start: u8, + /// Whether this control sequence has a private parameter string. + private: bool, + /// The parameters of the control sequence, if it is one. + params: Vec<Parameter>, + /// If this function is a control string, + /// this is the string's content. + /// + /// Otherwise, it's the intermediate bytes of the function. + /// For control sequences with private parameters, this contains the raw parameter string. + bytes: Vec<u8>, + /// The final byte of the control function. + /// + /// For C0 and C1 controls, as well as control strings, + /// this field is left unset. + end: u8, +} + +#[derive(Clone, Eq, PartialEq, Debug)] +pub enum TerminalInput<'a> { + Continue, + Char(char), + // If the UTF-8 decoder had to synchronize, two characters have to be inserted + SyncChar(char, char), + // FIXME: Passing this by reference saves on allocations, + // but currently requires that it is fully processed before parsing can continue. + // For performance, it may be better to pass a clone by value, and use a queue to avoid + // the input buffer getting clogged. Relevant for stuff that may take longer to evaluate, + // like SIXEL strings. + // Will require benchmarking though. + Control(&'a ControlFunction), + + SyncControl(char, &'a ControlFunction) +} + +#[derive(Clone, Debug, Default)] +pub struct TerminalInputParser { + /// The current parsing state. + state: State, + /// Container for parsed control function data. + ctl: ControlFunction, + /// Accumulator for current control sequence parameter. + pacc: Parameter, + /// UTF-8 character decoder. + utf8: utf8::UTF8Decoder +} + +impl TerminalInputParser { + pub fn new() -> Self { + Self::default() + } + + pub fn parse_byte(&mut self, byte: u8) -> TerminalInput { + if byte >= 0x80 { + if self.state != State::Ground { + self.state.poison(); + return TerminalInput::Continue; + } + + // UTF-8 here + match self.utf8.decode_byte(byte) { + utf8::DecodeState::Continue => TerminalInput::Continue, + utf8::DecodeState::Done(c) => TerminalInput::Char(c), + utf8::DecodeState::Error => TerminalInput::Char('\u{FFFD}'), + utf8::DecodeState::Rewind => { + // Recurse, but only once + let again = self.parse_byte(byte); + + match again { + TerminalInput::Continue => TerminalInput::Char('\u{FFFD}'), + TerminalInput::Char(c) => TerminalInput::SyncChar('\u{FFFD}', c), + TerminalInput::Control(ctl) => TerminalInput::SyncControl('\u{FFFD}', ctl), + // We can't hit UTF-8 Rewind from the base state, + // so we can never produce SyncChar or SyncControl here + _ => unsafe { std::hint::unreachable_unchecked() } + } + } + } + } else { + let class = CLASS_TABLE[byte as usize] as usize; + + let state = unsafe { + *STATE_TABLE.get_unchecked(self.state as usize + class) + }; + + let (base, action) = state.decompose(); + + self.state = base; + + match action { + Action::Continue => TerminalInput::Continue, + Action::Char => TerminalInput::Char(byte as char), + Action::C01Control => { + self.ctl.start = byte; + TerminalInput::Control(&self.ctl) + }, + + Action::StartSequence => { + self.ctl.start = byte; + self.ctl.params.clear(); + self.ctl.bytes.clear(); + TerminalInput::Continue + }, + + Action::FinishSequence => { + self.ctl.end = byte; + TerminalInput::Control(&self.ctl) + }, + + Action::PushByte => { + self.ctl.bytes.push(byte); + TerminalInput::Continue + }, + + Action::PushByteWithEscape => { + self.ctl.bytes.push(0x1B); + self.ctl.bytes.push(byte); + TerminalInput::Continue + }, + + Action::SetPrivate => { + self.ctl.private = true; + TerminalInput::Continue + }, + + Action::AddParamValue => { + let oflw = self.pacc.add(byte as u16 - 0x30); + + // You can theoretically do this if-less + // using something like state ^= (0x70 * oflw) + // It just turns the branch into a conditional move and a xor. + if oflw { + self.state = State::ControlSequenceError; + } + + TerminalInput::Continue + }, + + Action::PushParam => { + self.ctl.params.push(self.pacc); + self.pacc = Parameter::Default; + + TerminalInput::Continue + }, + + Action::PushParamAndByte => { + self.ctl.bytes.push(byte); + self.ctl.params.push(self.pacc); + self.pacc = Parameter::Default; + + TerminalInput::Continue + }, + + Action::PushParamAndEndSequence => { + self.ctl.params.push(self.pacc); + self.pacc = Parameter::Default; + self.ctl.end = byte; + TerminalInput::Control(&self.ctl) + } + } + } + } +} diff --git a/src/lib_old.rs b/src/lib_old.rs new file mode 100644 index 0000000..90bf7f4 --- /dev/null +++ b/src/lib_old.rs @@ -0,0 +1,424 @@ +pub mod recognizer; +pub mod v2; +mod utf8; + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +/// A control sequence parameter. +pub enum Parameter { + /// A for the control sequence appropriate default value should be used. + Default, + /// The parameter has a value. + Value(u16) +} + +impl Parameter { + pub fn new(v: u16) -> Self { + Self::Value(v) + } + + /// Returns the value of the parameter, if present, otherwise returns the given default. + pub fn value_or(&self, or: u16) -> u16 { + match self { + Self::Default => or, + Self::Value(v) => *v, + } + } + + /// Parsing parameters requires an [`atoi`]-like loop. + /// + /// Parameter value overflow causes the sequence to be rejected. + /// + /// [`atoi`]: https://en.cppreference.com/w/c/string/byte/atoi + pub fn add(&mut self, x: u16) -> bool { + match self { + Self::Default => { + *self = Self::Value(x); + false + }, + + Self::Value(v) => { + let (v2, oflw) = v.overflowing_add(x); + *v = v2; + oflw + } + } + } +} + +impl Default for Parameter { + fn default() -> Self { + Self::Default + } +} + +#[derive(Clone, Eq, PartialEq, Debug, Default)] +pub struct ControlFunction { + /// The start of the control function. + /// + /// For C0 and C1 controls, which are only 1 byte, + /// this is the only necessary field. + start: u8, + /// Whether this control sequence has a private parameter string. + private: bool, + /// The parameters of the control sequence, if it is one. + params: Vec<Parameter>, + /// If this function is a control string, + /// this is the string's content. + /// + /// Otherwise, it's the intermediate bytes of the function. + /// For control sequences with private parameters, this contains the raw parameter string. + bytes: Vec<u8>, + /// The final byte of the control function. + /// + /// For C0 and C1 controls, as well as control strings, + /// this field is left unset. + end: u8, +} + +#[derive(Clone, Eq, PartialEq, Debug)] +pub enum TerminalInput<'a> { + Continue, + Char(char), + // FIXME: Passing this by reference saves on allocations, + // but currently requires that it is fully processed before parsing can continue. + // For performance, it may be better to pass a clone by value, and use a queue to avoid + // the input buffer getting clogged. Relevant for stuff that may take longer to evaluate, + // like SIXEL strings. + // Will require benchmarking though. + Control(&'a ControlFunction), +} + +#[derive(Clone, Debug, Default)] +pub struct TerminalInputParser { + /// The current parsing state. + state: State, + /// Container for parsed control function data. + ctl: ControlFunction, + /// Accumulator for current control sequence parameter. + pacc: Parameter, + // /// UTF-8 character decoder. + // utf8: UTF8Decoder +} + +impl TerminalInputParser { + pub fn new() -> Self { + Self::default() + } + + pub fn parse_byte(&mut self, byte: u8) -> TerminalInput { + + unimplemented!() + } +} + + + +#[repr(u8)] +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum Class { + /// C0 Control Functions + /// + /// 00..1F + C0, + + /// C0 Control Functions permitted in Control Strings + /// + /// 08..0D + C0S, + + /// ESCAPE + /// + /// 1B + ESC, + + /// Control Function / Control Sequence Intermediate Bytes + /// + /// 20..2F + INT, + + /// Control Sequence Parameter Bytes + /// + /// 30..39 + PAR, + + /// Control Sequence Parameter Separators + /// + /// 3A..3B + SEP, + + /// Control Sequence Private Parameter String Indicator + /// + /// 3C..3F + PRI, + + /// C1 Control Functions + /// + /// ESC 40..5F + C1, + + /// Command String Opening Delimiter + /// + /// ESC 50, ESC 5D..5F + CSO, + + /// Start Of String + /// + /// ESC 58 + SOS, + + /// Single Character Introducer + /// + /// ESC 5A + SCI, + + /// Control Sequence Introducer + /// + /// ESC 5B + CSI, + + /// String Terminator + /// + /// ESC 5C + ST, + + /// Independent Control Function Final Bytes + /// + /// 60..7E + ICF, + + /// DELETE + /// + /// 7F + DEL, +} + +use Class::*; + +/// Byte to Class translation table +const CLASS_TABLE: [Class; 128] = [ + C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0S,C0S,C0S,C0S,C0S,C0S,C0 ,C0 , + C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,C0 ,ESC,C0 ,C0 ,C0 ,C0 , + INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT, + PAR,PAR,PAR,PAR,PAR,PAR,PAR,PAR,PAR,PAR,SEP,SEP,PRI,PRI,PRI,PRI, + C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 , + CSO,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,C1 ,SOS,C1 ,SCI,CSI,ST ,CSO,CSO,CSO, + ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF, + ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,ICF,DEL, +]; + +/// State + Class to State transition table +const STATE_TABLE: [State; 185] = [State::OK;185]; + +#[repr(u8)] +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum Action { + Continue, + ReturnChar, + C01Control, + StartSequence, + FinishSequence, + PushByte, + SetPrivate, + PushLastParam, + PushParamAndByte, + PushParam, + AddParamValue, +} + +#[allow(non_camel_case_types)] +#[repr(u8)] +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +// NCB: 20..7E +// CFF: 30..7E +// CSC: 08..0D,20..7E + +// 11 unique actions +// return char +// return continue +// set start, return control +// set start, return continue +// set end, return control +// push byte, return continue +// set private, return continue +// push param, set end, return control +// push param, push byte, return continue +// push param, return continue +// add value, return continue + +// always return to some base state, which is a multiple of 15 +// for example, we will never match against state 61 +// use modulo 15 arithmetic to encode 61 as "action 1, state 60" +// modulo 15 not ideal +// pad rows to 16 so we can use modulo 16 (easy bitmask) + + + +enum State { + /// Base state + /// + /// ```text,ignore + /// C0 -> OK_C01 (set start, return control) + /// ESC -> ESC (set start, return continue) + /// NCB -> OK_NCB ( return char) + /// DEL -> OK ( return continue) + /// ``` + OK = 0x00, + + /// Received ESCAPE + /// + /// ```text,ignore + /// C0 -> OK_C01 (set start, return control) + /// INT -> CF (push byte, return continue) + /// CFF -> OK_CF (set end, return control) + /// C1 -> OK_C01 (set start, return control) + /// CSO -> CMS (set start, return continue) + /// SOS -> SOS (set start, return continue) + /// SCI -> SCI (set start, return continue) + /// CSI -> CSI (set start, return continue) + /// _ -> OK ( return continue) + /// ``` + ESC = 0x10, + + /// Control Function + /// + /// ```text,ignore + /// INT -> CF (push byte, return continue) + /// CFF -> OK_CF (set end, return control) + /// _ -> ERR_CF ( return continue) + /// ``` + CF = 0x20, + + /// Poisoned Control Function + /// + /// ```text,ignore + /// CFF -> OK (return continue) + /// _ -> ERR_CF (return continue) + /// ``` + ERR_CF = 0x30, + + /// Command String + /// + /// ```text,ignore + /// ESC -> CMS_ESC ( return continue) + /// CSC -> CMS_ACC (push byte, return continue) + /// _ -> ERR_CMS ( return continue) + /// ``` + CMS = 0x40, + + /// Command String, Received ESCAPE + /// + /// ```text,ignore + /// ST -> OK_CF (set end, return control) + /// _ -> ERR_CMS ( return continue) + /// ``` + CMS_ESC = 0x50, + + /// Poisoned Command String + /// + /// ```text,ignore + /// ESC -> CMS_ESC (return continue) + /// _ -> ERR_CMS (return continue) + /// ``` + ERR_CMS = 0x60, + + /// Start Of String + /// + /// ```text,ignore + /// ESC -> SOS_ESC ( return continue) + /// _ -> SOS_ACC (push byte, return continue) + /// ``` + SOS = 0x70, + + /// Start Of String, Received ESCAPE + /// + /// ```text,ignore + /// ST -> OK_CF (set end, return control) + /// SOS -> ERR_CMS ( return continue) + /// _ -> SOS_ACC (push byte, return continue) + /// ``` + SOS_ESC = 0x80, + + /// Single Character Introducer + /// + /// ```text,ignore + /// CSC -> OK_CF (set end, return control) + /// _ -> OK ( return continue) + /// ``` + SCI = 0x90, + + /// Control Sequence Introducer + /// + /// ```text,ignore + /// PRI -> CSI_PRI (set private, return continue) + /// PAR -> CSI_PAR (add value, return continue) + /// SEP -> CSI_SEP (push param, return continue) + /// INT -> CSI_INT (push byte, return continue) + /// CSF -> OK_CF (set end, return control) + /// _ -> ERR_CSI ( return continue) + /// ``` + CSI = 0xA0, + + /// Control Sequence Introducer, Received Parameter Byte + /// + /// ```text,ignore + /// PAR -> CSI_PAR (add value, return continue) + /// SEP -> CSI_SEP (push param, return continue) + /// INT -> CSI_PIN (push param, push byte, return continue) + /// CSF -> OK_CSI (push param, set end, return control) + /// _ -> ERR_CSI ( return continue) + /// ``` + CSI_PAR = 0xB0, + + /// Control Sequence Introducer, Received Intermediate Byte + /// + /// ```text,ignore + /// INT -> CSI_INT (push byte, return continue) + /// CSF -> OK_CF (set end, return control) + /// _ -> ERR_CSI ( return continue) + /// ``` + CSI_INT = 0xC0, + + /// Poisoned Control Sequence Introducer + /// + /// ```text,ignore + /// CSF -> OK (return continue) + /// _ -> ERR_CSI (return continue) + /// ``` + ERR_CSI = 0xD0, + + // All states with an action. + // Base states are implicitly Action::Continue. + OK_C01 = State::OK as u8 | Action::C01Control as u8, + OK_NCB = State::OK as u8 | Action::ReturnChar as u8, + TR_ESC = State::ESC as u8 | Action::StartSequence as u8, + TR_CF = State::CF as u8 | Action::PushByte as u8, + OK_CF = State::OK as u8 | Action::FinishSequence as u8, + TR_CMS = State::CMS as u8 | Action::StartSequence as u8, + TR_SOS = State::SOS as u8 | Action::StartSequence as u8, + TR_SCI = State::SCI as u8 | Action::StartSequence as u8, + TR_CSI = State::CSI as u8 | Action::StartSequence as u8, + CMS_ACC = State::CMS as u8 | Action::PushByte as u8, + SOS_ACC = State::SOS as u8 | Action::PushByte as u8, + CSI_PRI = State::CSI as u8 | Action::SetPrivate as u8, + CSI_SEP = State::CSI as u8 | Action::PushParam as u8, + CSI_PAC = State::CSI as u8 | Action::AddParamValue as u8, + CSI_IAC = State::CSI as u8 | Action::PushByte as u8, + CSI_PIN = State::CSI as u8 | Action::PushParamAndByte as u8, +} + +impl State { + /// Decomposes a state into base state and parser action. + fn decompose(self) -> (State, Action) { + use std::mem::transmute as cast; + + unsafe { + (cast(self as u8 & 0xF0), cast(self as u8 & 0x0F)) + } + } +} + +impl Default for State { + fn default() -> State { + State::OK + } +} diff --git a/src/recognizer.rs b/src/recognizer.rs new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/recognizer.rs diff --git a/src/utf8.rs b/src/utf8.rs new file mode 100644 index 0000000..0a3ffc8 --- /dev/null +++ b/src/utf8.rs @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2008-2009 Bjoern Hoehrmann <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software + * and associated documentation files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or + * substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING + * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +// See https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for more information on how this works. + +/// Decoder ground state. +const OK: u8 = 0; +/// Decoder error state. +const ER: u8 = 96; +/// Decoder error state. Offending byte should be passed in again ("rewind") +const RW: u8 = 108; + +const UTF8_TABLE: [u8; 256+96] = [ + // Maps bytes to character classes + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 0x90 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 0xA0 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xC0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, // 0xE0 + 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 0xF0 + + // Maps state (row) + character class (column) to next state + OK, ER, 12, 24, 48, 84, 72, ER, ER, ER, 36, 60, // 0 - OK + RW, OK, RW, RW, RW, RW, RW, OK, RW, OK, RW, RW, // 12 - 1 byte needed + RW, 12, RW, RW, RW, RW, RW, 12, RW, 12, RW, RW, // 24 - 2 bytes needed + RW, RW, RW, RW, RW, RW, RW, 12, RW, RW, RW, RW, // 36 - 2 bytes needed, E0 lead + RW, 12, RW, RW, RW, RW, RW, RW, RW, 12, RW, RW, // 48 - 2 bytes needed, ED lead + RW, RW, RW, RW, RW, RW, RW, 24, RW, 24, RW, RW, // 60 - 3 bytes needed, F0 lead + RW, 24, RW, RW, RW, RW, RW, 24, RW, 24, RW, RW, // 72 - 3 bytes needed + RW, 24, RW, RW, RW, RW, RW, RW, RW, RW, RW, RW, // 84 - 3 bytes needed, F4 lead +]; + +#[derive(Copy, Clone, Debug, Default)] +pub struct UTF8Decoder { + code_point: u32, + state: u8 +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum DecodeState { + Done(char), + Continue, + Error, + Rewind +} + +impl UTF8Decoder { + #[inline] + pub fn reset(&mut self) { + self.state = 0; + } + + pub fn decode_byte(&mut self, byte: u8) -> DecodeState { + let class = UTF8_TABLE[byte as usize]; + + self.code_point = + if self.state == OK { + // The character class values for leading bytes simultaneously form a bitmask. + // For class 0, this is a no-op + // For classes > 7, this is 0 (continuations & invalid bytes) + (0xFF >> class) & byte as u32 + } else { + // Standard continuation byte extraction. + // It's okay if this is gibberish due to invalid input, + // errors reset state to OK, and code_point gets cleared on the next input. + (self.code_point << 6) | (byte as u32 & 0x3F) + }; + + unsafe { + // The compiler can't verify this access is always in bounds, but it is, I promise. + self.state = *UTF8_TABLE.get_unchecked(256 + self.state as usize + class as usize); + + match self.state { + // Surrogate or out of bounds code points will be rejected, so this is safe. + OK => DecodeState::Done(std::char::from_u32_unchecked(self.code_point)), + ER => { self.reset(); DecodeState::Error }, + RW => { self.reset(); DecodeState::Rewind }, + _ => DecodeState::Continue + } + } + } +} |