//============================================================================ // // Name: CS String Tokeniser - Main Include File // File: cs_token // Author: Craig Smith (Galap) // //---------------------------------------------------------------------------- // This software is distributed in the hope that it will be useful. It is // provided "as is" WITHOUT WARRANTY OF ANY KIND, either expressed or implied, // including, but not limited to, the implied warranties of merchantability // and fitness for a particular purpose. You may redistribute or modify this // software for your own purposes so long as all original credit information // remains intact. //---------------------------------------------------------------------------- // // Introduction // ------------ // The functions in this file allow strings to be broken down into substrings // at specific points, i.e. via a separator character such as an underscore. // Such a facility allows data fields to be embedded into strings such as // object or creature tags for easy retrieval. // // Using the CS String Tokeniser in your own code // ---------------------------------------------- // To use the tokeniser, simple include this file into your own scripts by // placing the following line near the top of the script file: // // #include "cs_token" // // If you then scroll through the function list on the right of the toolset's // code editor, you will see various functions beginning with a "cs_tkn" // prefix. // // In general, there are two approaches to using a string tokeniser. One // usually either knows the fields that are present within the string and // so merely extracts them one after the other, or a loop is used to walk // through the string until no more tokens are left. // // If you already know the number of tokens in the string, then a code // fragment like this is likely to be the sort of thing you would use: // // #include "cs_token" // void main() { // cs_tkn_SetTokenString("some_string_3"); // string firstToken = cs_tkn_GetNextToken(); // string secondToken = cs_tkn_GetNextToken(); // int thirdToken = StringToInt(cs_tkn_GetNextToken()); // // .. now do something with the values returned.. // } // // At the end of this code fragment, the string variable 'firstToken will // hold a value of "some", 'secondToken' will hold "string" and the integer // variable 'thirdToken' will have the value 3. Note that you must convert // numeric token values to integers manually as the tokeniser only returns // strings. Looping through the tokens in the string is only slightly more // complex: // // #include "cs_token" // void main() { // cs_tkn_SetTokenString("another_string_of_tokens"); // while(!cs_tkn_GetIsLastToken()) { // string token = cs_tkn_GetNextToken(); // .. do something with the string 'token' .. // } // } // // If you wish to use a character other than an underscore as the separator // between tokens, pass a single character string as the second parameter // to the cs_tkn_GetFirstToken() function like this: // // cs_tkn_SetTokenString("some:string:3", ":"); // // Note that you cannot nest calls to the tokeniser within nested loops or // within function calls. Bit of a bugger, that, and I might fix it one day // if I need it. // // Revision History // ---------------- // 0.4.0dev Refactored cs_tkn_GetFirstToken to cs_tkn_SetTokenString. // 0.3.0dev Added cs_tkn_GetRemainingTokens(). // 0.2.0dev Consolidation of all tokeniser functions in one place. // 0.1.0dev Various functions in numerous places. // //============================================================================ #include "cs_dbg" // The version numbering I use is comprised of a tripartite version number // and a trailing string. The version is arranged into 1.2.3 form, where 1 is // the major release number, 2 is the minor release number and 3 is the bugfix // level. Each number in the version starts at 0 and increments each time a // change is made to the code that classifies as that level. The trailing // string is either "dev", "beta" or "final" depending upon whether the code // is still under development (it will have bugs, count on it), is considered // to be beta quality (i.e. technically finished but probably has bugs), or // finished, tested and believed to be correctly functional. If you wish to // report a bug, please include the version number with your bug report. string cs_tkn_version = "0.4.0dev"; //============================================================================ // // Data Types and Constants // //============================================================================ // The delimiter used to break down the string. string cs_tkn_tokenDelimiter = "_"; // The string we're tokenising. string cs_tkn_tokenString = ""; // The length of the string being tokenised (to avoid multiple function calls). int cs_tkn_tokenLen; // The position of the last delimiter found (i.e. start of next token). int cs_tkn_tokenPos; // TRUE when the end of the string is reached and there are no more tokens. int cs_tkn_tokenLast; //============================================================================ // // Function Prototypes and toolset IDE documentation. // //============================================================================ // Determine whether there are any more tokens in the current string. This // function is intended primarily to be used as the controlling conditional // for loop constructs. // // returns TRUE if there are no more tokens, otherwise FALSE. int cs_tkn_GetIsLastToken(); // Return all remaining tokens in the string. After this function is called, // the string is considered exhausted and the cs_tkn_GetIsLastToken() function // will return TRUE. // // returns The entirety of the remaining tokens in the string as a single // string with all delimiters in place. string cs_tkn_GetRemainingTokens(); // Extract the next token from the string most recently passed to the // cs_tkn_SetTokenString() function. Note that it is possible for a value // of "" (i.e. an empty string) to be returned as a valid token if two // separator characters occur next to each other. Use the function // cs_tkn_GetIsLastToken() to determine when the last token has been // reached. // // returns A string of the value of the next token, or "" if there are // no more tokens. string cs_tkn_GetNextToken(); // Initialise the tokeniser with a new string. // // tokenString The string to be broken down into tokens // delimiter The separator character between tokens. If not specified, // the separator character defaults to an underscore. void cs_tkn_SetTokenString(string tokenString, string delimiter="_"); // Determine the number of tokens in the string most recently initialised. // // returns The number of tokens in the current token string. int cs_tkn_GetNumberOfTokens(); //---------------------------------------------------------------------------- int cs_tkn_GetIsLastToken() { cs_dbg_Trace("cs_tkn_GetIsLastToken(): " + IntToString(cs_tkn_tokenLast)); return cs_tkn_tokenLast; } //---------------------------------------------------------------------------- string cs_tkn_GetRemainingTokens() { cs_dbg_Enter("cs_tkn_GetRemainingTokens()"); string token = ""; cs_dbg_Trace( "before: " + IntToString(cs_tkn_tokenPos) + ", " + IntToString(cs_tkn_tokenLast) ); // Only do anything if we have not already reached the last token. if (!cs_tkn_tokenLast) { // Extract the token. token = GetSubString( cs_tkn_tokenString, cs_tkn_tokenPos, cs_tkn_tokenLen - cs_tkn_tokenPos ); cs_tkn_tokenPos = cs_tkn_tokenLen; // No more tokens left now. cs_tkn_tokenLast = TRUE; } cs_dbg_Exit("cs_tkn_GetRemainingTokens"); return token; } //---------------------------------------------------------------------------- string cs_tkn_GetNextToken() { cs_dbg_Enter("cs_tkn_GetNextToken()"); string token = ""; cs_dbg_Trace( "before: " + IntToString(cs_tkn_tokenPos) + ", " + IntToString(cs_tkn_tokenLast) ); // Only do anything if we have not already reached the last token. if (!cs_tkn_tokenLast) { int lastPos = cs_tkn_tokenPos; // search util we find the end of the string or the next delimiter. while ((cs_tkn_tokenPos < cs_tkn_tokenLen) && (GetSubString( cs_tkn_tokenString, cs_tkn_tokenPos, 1 ) != cs_tkn_tokenDelimiter) ) { cs_tkn_tokenPos++; } // Determine if we're at the end of the string. if (cs_tkn_tokenPos >= cs_tkn_tokenLen) { cs_tkn_tokenLast = TRUE; } // Extract the token. token = GetSubString( cs_tkn_tokenString, lastPos, cs_tkn_tokenPos - lastPos ); cs_tkn_tokenPos++; } cs_dbg_Trace( "after: " + IntToString(cs_tkn_tokenPos) + ", " + IntToString(cs_tkn_tokenLast) ); cs_dbg_Exit("cs_tkn_GetNextToken"); return token; } //---------------------------------------------------------------------------- void cs_tkn_SetTokenString(string tokenString, string delimiter="_") { cs_dbg_Enter("cs_tkn_GetFirstToken(" + tokenString + ")"); // Initialise the token string. cs_tkn_tokenString = tokenString; cs_tkn_tokenLen = GetStringLength(tokenString); cs_tkn_tokenPos = 0; cs_tkn_tokenLast = FALSE; cs_tkn_tokenDelimiter = delimiter; cs_dbg_Trace( "initial: " + cs_tkn_tokenString + ", " + IntToString(cs_tkn_tokenLen) + ", " + cs_tkn_tokenDelimiter + ", " + IntToString(cs_tkn_tokenPos) + ", " + IntToString(cs_tkn_tokenLast) ); cs_dbg_Exit("cs_tkn_GetFirstToken"); } //---------------------------------------------------------------------------- int cs_tkn_GetNumberOfTokens() { int count = (cs_tkn_tokenLen > 0? 1 : 0); int pos = 0; while (pos < cs_tkn_tokenLen) { if (GetSubString(cs_tkn_tokenString, pos, 1) == cs_tkn_tokenDelimiter) { count++; } pos++; } cs_dbg_Trace("cs_tkn_GetNumberOfTokens: " + IntToString(count)); return count; }