com.ibm.icu.text.UTF16.isTrailSurrogate()方法的使用及代码示例

x33g5p2x  于2022-02-01 转载在 其他  
字(12.3k)|赞(0)|评价(0)|浏览(136)

本文整理了Java中com.ibm.icu.text.UTF16.isTrailSurrogate()方法的一些代码示例,展示了UTF16.isTrailSurrogate()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。UTF16.isTrailSurrogate()方法的具体详情如下:
包路径:com.ibm.icu.text.UTF16
类名称:UTF16
方法名:isTrailSurrogate

UTF16.isTrailSurrogate介绍

[英]Determines whether the character is a trail surrogate.
[中]确定角色是否为轨迹代理。

代码示例

代码示例来源:origin: com.ibm.icu/icu4j-charset

private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){
  if(source.hasRemaining()){
    /*test the following code unit*/
    char trail = source.get(source.position());
    if(UTF16.isTrailSurrogate(trail)){
      source.position(source.position()+1);
      ++nextSourceIndex;
      c=UCharacter.getCodePoint((char)c, trail);
    }
  } else {
    /*no more input*/
    c = -c; /*negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else*/
    checkNegative = true;
  }
  LoopAfterTrail = true;
  return regularLoop;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Does the set contain the next code point?
 * If so, return its length; otherwise return its negative length.
 */
static int spanOne(final UnicodeSet set, CharSequence s, int start, int length) {
  char c = s.charAt(start);
  if (c >= 0xd800 && c <= 0xdbff && length >= 2) {
    char c2 = s.charAt(start + 1);
    if (com.ibm.icu.text.UTF16.isTrailSurrogate(c2)) {
      int supplementary = Character.toCodePoint(c, c2);
      return set.contains(supplementary) ? 2 : -2;
    }
  }
  return set.contains(c) ? 1 : -1;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Reverses a UTF16 format Unicode string and replaces source's content with it. This method
 * will reverse surrogate characters correctly, instead of blindly reversing every character.
 * <p>
 * Examples:<br>
 * UTF16.reverse(new StringBuffer( "Supplementary characters \ud800\udc00\ud801\udc01"))<br>
 * returns "\ud801\udc01\ud800\udc00 sretcarahc yratnemelppuS".
 *
 * @param source The source StringBuffer that contains UTF16 format Unicode string to be reversed
 * @return a modified source with reversed UTF16 format Unicode string.
 * @stable ICU 2.6
 */
public static StringBuffer reverse(StringBuffer source) {
  int length = source.length();
  StringBuffer result = new StringBuffer(length);
  for (int i = length; i-- > 0;) {
    char ch = source.charAt(i);
    if (isTrailSurrogate(ch) && i > 0) {
      char ch2 = source.charAt(i - 1);
      if (isLeadSurrogate(ch2)) {
        result.append(ch2);
        result.append(ch);
        --i;
        continue;
      }
    }
    result.append(ch);
  }
  return result;
}

代码示例来源:origin: org.eclipse/org.eclipse.jem.util

/**
 * Return the previous character from the one that was just returned.
 * @return
 * 
 * @since 1.2.0
 */
public int previous() {
  if (!hasPrevious())
    throw new IllegalStateException();
  int next;
  if (UTF16.isTrailSurrogate(charSeq.charAt(--pos))) {
    if (pos > 0)
      next = utfCharAt(--pos);
    else
      next = charSeq.charAt(pos);
  } else {
    next = charSeq.charAt(pos);
  }
  lastCharIndex = pos;
  return next;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

ch = source[result];
if (isLeadSurrogate(ch) && ((result + 1) < limit)
    && isTrailSurrogate(source[result + 1])) {
  result++;

代码示例来源:origin: com.ibm.icu/icu4j-charset

private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){
  lead = (char)c;
  int label = Loop;
  if(source.hasRemaining()){
    /*test the following code unit*/
    trail = source.get(source.position());
    if(UTF16.isTrailSurrogate(trail)){
      source.position(source.position()+1);
      ++nextSourceIndex;
      c = UCharacter.getCodePoint((char)c, trail);
      label = Loop;
    } else {
      /*this is unmatched lead code unit (1st Surrogate)*/
      /*callback(illegal)*/
      cr = CoderResult.malformedForLength(1);
      label = EndLoop;
    }
  }else {
    /*no more input*/
    label = EndLoop;
  }
  AfterGetTrail = true;
  return label;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
* Get the value associated with a pair of surrogates.
* @param lead a lead surrogate
* @param trail a trail surrogate
*/
public final int getSurrogateValue(char lead, char trail)
{
  if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
    throw new IllegalArgumentException(
      "Argument characters do not form a supplementary character");
  }
  // get fold position for the next trail surrogate
  int offset = getSurrogateOffset(lead, trail);
  // get the real data from the folded lead/trail units
  if (offset > 0) {
    return m_data_[offset];
  }
  // return m_initialValue_ if there is an error
  return m_initialValue_;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Set a code point into a UTF16 position. Adjusts target according if we are replacing a
 * non-supplementary codepoint with a supplementary and vice versa.
 *
 * @param target Stringbuffer
 * @param offset16 UTF16 position to insert into
 * @param char32 Code point
 * @stable ICU 2.1
 */
public static void setCharAt(StringBuffer target, int offset16, int char32) {
  int count = 1;
  char single = target.charAt(offset16);
  if (isSurrogate(single)) {
    // pairs of the surrogate with offset16 at the lead char found
    if (isLeadSurrogate(single) && (target.length() > offset16 + 1)
        && isTrailSurrogate(target.charAt(offset16 + 1))) {
      count++;
    } else {
      // pairs of the surrogate with offset16 at the trail char
      // found
      if (isTrailSurrogate(single) && (offset16 > 0)
          && isLeadSurrogate(target.charAt(offset16 - 1))) {
        offset16--;
        count++;
      }
    }
  }
  target.replace(offset16, offset16 + count, valueOf(char32));
}

代码示例来源:origin: com.ibm.icu/icu4j-charset

private final boolean getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr) {
  if (x.sourceArrayIndex < source.limit()) {
    /* test the following code unit */
    char trail = source.get(x.sourceArrayIndex);
    if (UTF16.isTrailSurrogate(trail)) {
      ++x.sourceArrayIndex;
      x.c = UCharacter.getCodePoint((char) x.c, trail);
      /* this codepage does not map supplementary code points */
      /* callback(unassigned) */
      cr[0] = CoderResult.unmappableForLength(2);
      return false;
    } else {
      /* this is an unmatched lead code unit (1st surrogate) */
      /* callback(illegal) */
      cr[0] = CoderResult.malformedForLength(1);
      return false;
    }
  } else {
    /* no more input */
    return false;
  }
  // return true;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See
 * the {@link UTF16 class description} for notes on roundtripping.
 *
 * @param source The UTF-16 string
 * @param offset32 UTF-32 offset
 * @return UTF-16 offset
 * @exception IndexOutOfBoundsException If offset32 is out of bounds.
 * @stable ICU 2.1
 */
public static int findOffsetFromCodePoint(String source, int offset32) {
  char ch;
  int size = source.length(), result = 0, count = offset32;
  if (offset32 < 0 || offset32 > size) {
    throw new StringIndexOutOfBoundsException(offset32);
  }
  while (result < size && count > 0) {
    ch = source.charAt(result);
    if (isLeadSurrogate(ch) && ((result + 1) < size)
        && isTrailSurrogate(source.charAt(result + 1))) {
      result++;
    }
    count--;
    result++;
  }
  if (count != 0) {
    throw new StringIndexOutOfBoundsException(offset32);
  }
  return result;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See
 * the {@link UTF16 class description} for notes on roundtripping.
 *
 * @param source The UTF-16 string buffer
 * @param offset32 UTF-32 offset
 * @return UTF-16 offset
 * @exception IndexOutOfBoundsException If offset32 is out of bounds.
 * @stable ICU 2.1
 */
public static int findOffsetFromCodePoint(StringBuffer source, int offset32) {
  char ch;
  int size = source.length(), result = 0, count = offset32;
  if (offset32 < 0 || offset32 > size) {
    throw new StringIndexOutOfBoundsException(offset32);
  }
  while (result < size && count > 0) {
    ch = source.charAt(result);
    if (isLeadSurrogate(ch) && ((result + 1) < size)
        && isTrailSurrogate(source.charAt(result + 1))) {
      result++;
    }
    count--;
    result++;
  }
  if (count != 0) {
    throw new StringIndexOutOfBoundsException(offset32);
  }
  return result;
}

代码示例来源:origin: com.ibm.icu/icu4j-charset

private final boolean getTrailDouble(CharBuffer source, ByteBuffer target, int uniMask,
    SideEffectsDouble x, boolean flush, CoderResult[] cr) {
  if (x.sourceArrayIndex < source.limit()) {
    /* test the following code unit */
    char trail = source.get(x.sourceArrayIndex);
    if (UTF16.isTrailSurrogate(trail)) {
      ++x.sourceArrayIndex;
      ++x.nextSourceIndex;
      /* convert this supplementary code point */
      x.c = UCharacter.getCodePoint((char) x.c, trail);
      if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
        /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
        /* callback(unassigned) */
        x.doread = true;
        return unassignedDouble(source, target, x, flush, cr);
      } else {
        x.doread = false;
        return true;
      }
    } else {
      /* this is an unmatched lead code unit (1st surrogate) */
      /* callback(illegal) */
      cr[0] = CoderResult.malformedForLength(1);
      return false;
    }
  } else {
    /* no more input */
    return false;
  }
}

代码示例来源:origin: com.ibm.icu/icu4j-charset

if (UTF16.isTrailSurrogate(trail)) {
  ++x.sourceArrayIndex;
  ++x.nextSourceIndex;

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Returns the code point at index, and increments to the next code point (post-increment semantics). If index does
 * not point to a valid surrogate pair, the behavior is the same as <code>next()</code>. Otherwise the iterator is
 * incremented past the surrogate pair, and the code point represented by the pair is returned.
 *
 * @return the next codepoint in text, or DONE if the index is at the limit of the text.
 * @stable ICU 2.4
 */
@Override
public int nextCodePoint() {
  int ch1 = next();
  if (UTF16.isLeadSurrogate((char) ch1)) {
    int ch2 = next();
    if (UTF16.isTrailSurrogate((char) ch2)) {
      return Character.toCodePoint((char) ch1, (char) ch2);
    } else if (ch2 != DONE) {
      // unmatched surrogate so back out
      previous();
    }
  }
  return ch1;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Retreat to the start of the previous code point in the text, and return it (pre-decrement semantics). If the
 * index is not preceeded by a valid surrogate pair, the behavior is the same as <code>previous()</code>. Otherwise
 * the iterator is decremented to the start of the surrogate pair, and the code point represented by the pair is
 * returned.
 *
 * @return the previous code point in the text, or DONE if the new index is before the start of the text.
 * @stable ICU 2.4
 */
public int previousCodePoint() {
  int ch1 = previous();
  if (UTF16.isTrailSurrogate((char) ch1)) {
    int ch2 = previous();
    if (UTF16.isLeadSurrogate((char) ch2)) {
      return Character.toCodePoint((char) ch2, (char) ch1);
    } else if (ch2 != DONE) {
      // unmatched trail surrogate so back out
      next();
    }
  }
  return ch1;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

public static int nextTrail32(CharacterIterator ci, int lead) {
  if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
    return DONE32;
  }
  int retVal = lead;
  if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
    char  cTrail = ci.next();
    if (UTF16.isTrailSurrogate(cTrail)) {
      retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
            (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
            UTF16.SUPPLEMENTARY_MIN_VALUE;
    } else {
      ci.previous();
    }
  }
  return retVal;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Returns the current codepoint
 * @return current codepoint
 */
@Override
public int currentCodePoint(){
  // cannot use charAt due to it different
  // behaviour when index is pointing at a
  // trail surrogate, check for surrogates
  int ch = current();
  if(UTF16.isLeadSurrogate((char)ch)){
    // advance the index to get the next code point
    next();
    // due to post increment semantics current() after next()
    // actually returns the next char which is what we want
    int ch2 = current();
    // current should never change the current index so back off
    previous();
    if(UTF16.isTrailSurrogate((char)ch2)){
      // we found a surrogate pair
      return Character.toCodePoint((char)ch, (char)ch2);
    }
  }
  return ch;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

/**
 * Returns the codepoint at the current index. If the current index is invalid, DONE is returned. If the current
 * index points to a lead surrogate, and there is a following trail surrogate, then the code point is returned.
 * Otherwise, the code unit at index is returned. Index is not changed.
 *
 * @return current codepoint
 * @stable ICU 2.4
 */
public int currentCodePoint() {
  int ch = current();
  if (UTF16.isLeadSurrogate((char) ch)) {
    // advance the index to get the
    // next code point
    next();
    // due to post increment semantics
    // current() after next() actually
    // returns the char we want
    int ch2 = current();
    // current should never change
    // the current index so back off
    previous();
    if (UTF16.isTrailSurrogate((char) ch2)) {
      // we found a surrogate pair
      // return the codepoint
      return Character.toCodePoint((char) ch, (char) ch2);
    }
  }
  return ch;
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

public static int current32(CharacterIterator ci) {
    char  lead   = ci.current();
    int   retVal = lead;
    if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
      return retVal;   
    }
    if (UTF16.isLeadSurrogate(lead)) {
      int  trail = (int)ci.next();
      ci.previous();
      if (UTF16.isTrailSurrogate((char)trail)) {
        retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
             (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
             UTF16.SUPPLEMENTARY_MIN_VALUE;
      }
     } else {
      if (lead == CharacterIterator.DONE) {
        if (ci.getIndex() >= ci.getEndIndex())   {
          retVal = DONE32;   
        }
      }
     }
    return retVal;
  }
}

代码示例来源:origin: io.virtdata/virtdata-lib-realer

public static int previous32(CharacterIterator ci) {
  if (ci.getIndex() <= ci.getBeginIndex()) {
    return DONE32;   
  }
  char trail = ci.previous();
  int retVal = trail;
  if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
    char lead = ci.previous();
    if (UTF16.isLeadSurrogate(lead)) {
      retVal = (((int)lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
           ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
           UTF16.SUPPLEMENTARY_MIN_VALUE;
    } else {
      ci.next();
    }           
  }
  return retVal;
}

相关文章