public class Token
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static byte |
CAPS_ALL |
static byte |
CAPS_FIRST_ONLY |
static byte |
CAPS_MIXED |
static byte |
CAPS_NONE |
static byte |
CAPS_UNKNOWN |
private byte |
iv_caps |
private int |
iv_endOffset |
private boolean |
iv_isInteger |
private byte |
iv_numPosition |
private int |
iv_startOffset |
private java.lang.String |
iv_text |
private byte |
iv_type |
static byte |
NUM_FIRST |
static byte |
NUM_LAST |
static byte |
NUM_MIDDLE |
static byte |
NUM_NONE |
static byte |
TYPE_CONTRACTION
Contains contractions and possessives (since they cannot be
differentiated without context).
|
static byte |
TYPE_EOL
A EOL token is defined as a line feed or carriage return character.
|
static byte |
TYPE_NUMBER
A number token is defined as a consecutive series of digits.
|
static byte |
TYPE_PUNCT
A punctuation token is defined as one character that can be either a
period, double quote, single quote, question mark, exclamation point,
hyphen (if not surrounded by word characters), etc...
|
static byte |
TYPE_SYMBOL
Characters @!#$%^&*?
|
static byte |
TYPE_UNKNOWN
The type is unknown.
|
static byte |
TYPE_WORD
A word token is defined as a consecutive series of word characters.
|
Constructor and Description |
---|
Token(int startOffset,
int endOffset)
Constructor
|
Modifier and Type | Method and Description |
---|---|
byte |
getCaps()
Gets the caps state of the token.
|
int |
getEndOffset()
Gets the end offset.
|
byte |
getNumPosition()
Gets the position of a number inside a Token.
|
int |
getStartOffset()
Gets the start offset.
|
java.lang.String |
getText() |
byte |
getType()
Gets the type of the token.
|
boolean |
isInteger() |
void |
setCaps(byte b)
Sets the caps state of the token.
|
void |
setEndOffset(int i)
Sets the end offset.
|
void |
setIsInteger(boolean isInteger) |
void |
setNumPosition(byte b)
Sets the position of a number inside a Token.
|
void |
setStartOffset(int i)
Sets the start offset.
|
void |
setText(java.lang.String s) |
void |
setType(byte b)
Sets the type of the token.
|
java.lang.String |
toString() |
static java.lang.String |
typeDescription(byte type) |
public static final byte TYPE_UNKNOWN
public static final byte TYPE_WORD
public static final byte TYPE_NUMBER
public static final byte TYPE_PUNCT
public static final byte TYPE_EOL
public static final byte TYPE_CONTRACTION
public static final byte TYPE_SYMBOL
public static final byte CAPS_UNKNOWN
public static final byte CAPS_NONE
public static final byte CAPS_MIXED
public static final byte CAPS_FIRST_ONLY
public static final byte CAPS_ALL
public static final byte NUM_NONE
public static final byte NUM_FIRST
public static final byte NUM_MIDDLE
public static final byte NUM_LAST
private byte iv_type
private byte iv_caps
private byte iv_numPosition
private int iv_startOffset
private int iv_endOffset
private java.lang.String iv_text
private boolean iv_isInteger
public Token(int startOffset, int endOffset)
startOffset
- The token's start offset.endOffset
- The token's end offset.public int getEndOffset()
public void setEndOffset(int i)
public int getStartOffset()
public void setStartOffset(int i)
public byte getType()
public void setType(byte b)
public byte getCaps()
public void setCaps(byte b)
public byte getNumPosition()
public void setNumPosition(byte b)
public java.lang.String getText()
public void setText(java.lang.String s)
public boolean isInteger()
public void setIsInteger(boolean isInteger)
public java.lang.String toString()
toString
in class java.lang.Object
public static java.lang.String typeDescription(byte type)