class Reline::Unicode
Constants
- CSI_REGEXP
- EscapedChars
- EscapedPairs
- NON_PRINTING_END
- NON_PRINTING_START
- OSC_REGEXP
- WIDTH_SCANNER
Public Class Methods
Source
# File lib/reline/unicode.rb, line 98 def self.calculate_width(str, allow_escape_code = false) if allow_escape_code width = 0 rest = str.encode(Encoding::UTF_8) in_zero_width = false rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case when non_printing_start in_zero_width = true when non_printing_end in_zero_width = false when csi, osc when gc unless in_zero_width width += get_mbchar_width(gc) end end end width else str.encode(Encoding::UTF_8).grapheme_clusters.inject(0) { |w, gc| w + get_mbchar_width(gc) } end end
Source
# File lib/reline/unicode.rb, line 636 def self.common_prefix(list, ignore_case: false) return '' if list.empty? common_prefix_gcs = list.first.grapheme_clusters list.each do |item| gcs = item.grapheme_clusters common_prefix_gcs = common_prefix_gcs.take_while.with_index do |gc, i| ignore_case ? gc.casecmp?(gcs[i]) : gc == gcs[i] end end common_prefix_gcs.join end
Source
# File lib/reline/unicode.rb, line 341 def self.ed_transpose_words(line, byte_pointer) right_word_start = nil size = get_next_mbchar_size(line, byte_pointer) mbchar = line.byteslice(byte_pointer, size) if size.zero? # ' aaa bbb [cursor]' byte_size = 0 while 0 < (byte_pointer + byte_size) size = get_prev_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ byte_size -= size end while 0 < (byte_pointer + byte_size) size = get_prev_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size -= size end right_word_start = byte_pointer + byte_size byte_size = 0 while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size += size end after_start = byte_pointer + byte_size elsif mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ # ' aaa bb[cursor]b' byte_size = 0 while 0 < (byte_pointer + byte_size) size = get_prev_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size -= size end right_word_start = byte_pointer + byte_size byte_size = 0 while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size += size end after_start = byte_pointer + byte_size else byte_size = 0 while (line.bytesize - 1) > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ byte_size += size end if (byte_pointer + byte_size) == (line.bytesize - 1) # ' aaa bbb [cursor] ' after_start = line.bytesize while 0 < (byte_pointer + byte_size) size = get_prev_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ byte_size -= size end while 0 < (byte_pointer + byte_size) size = get_prev_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size -= size end right_word_start = byte_pointer + byte_size else # ' aaa [cursor] bbb ' right_word_start = byte_pointer + byte_size while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size += size end after_start = byte_pointer + byte_size end end byte_size = right_word_start - byte_pointer while 0 < (byte_pointer + byte_size) size = get_prev_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ byte_size -= size end middle_start = byte_pointer + byte_size byte_size = middle_start - byte_pointer while 0 < (byte_pointer + byte_size) size = get_prev_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size -= size end left_word_start = byte_pointer + byte_size [left_word_start, middle_start, right_word_start, after_start] end
Source
# File lib/reline/unicode.rb, line 307 def self.em_backward_word(line, byte_pointer) byte_size = 0 while 0 < (byte_pointer - byte_size) size = get_prev_mbchar_size(line, byte_pointer - byte_size) mbchar = line.byteslice(byte_pointer - byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ byte_size += size end while 0 < (byte_pointer - byte_size) size = get_prev_mbchar_size(line, byte_pointer - byte_size) mbchar = line.byteslice(byte_pointer - byte_size - size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size += size end byte_size end
Source
# File lib/reline/unicode.rb, line 324 def self.em_big_backward_word(line, byte_pointer) byte_size = 0 while 0 < (byte_pointer - byte_size) size = get_prev_mbchar_size(line, byte_pointer - byte_size) mbchar = line.byteslice(byte_pointer - byte_size - size, size) break if mbchar =~ /\S/ byte_size += size end while 0 < (byte_pointer - byte_size) size = get_prev_mbchar_size(line, byte_pointer - byte_size) mbchar = line.byteslice(byte_pointer - byte_size - size, size) break if mbchar =~ /\s/ byte_size += size end byte_size end
Source
# File lib/reline/unicode.rb, line 264 def self.em_forward_word(line, byte_pointer) byte_size = 0 while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ byte_size += size end while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ byte_size += size end byte_size end
Source
# File lib/reline/unicode.rb, line 281 def self.em_forward_word_with_capitalization(line, byte_pointer) byte_size = 0 new_str = String.new while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ new_str += mbchar byte_size += size end first = true while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ if first new_str += mbchar.upcase first = false else new_str += mbchar.downcase end byte_size += size end [byte_size, new_str] end
Source
# File lib/reline/unicode.rb, line 44 def self.escape_for_print(str) str.chars.map! { |gr| case gr when -"\n" gr when -"\t" -' ' else EscapedPairs[gr.ord] || gr end }.join end
Source
# File lib/reline/unicode.rb, line 75 def self.get_mbchar_width(mbchar) ord = mbchar.ord if ord <= 0x1F # in EscapedPairs return 2 elsif ord <= 0x7E # printable ASCII chars return 1 end utf8_mbchar = mbchar.encode(Encoding::UTF_8) ord = utf8_mbchar.ord chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o } size = EastAsianWidth::CHUNK_WIDTH[chunk_index] if size == -1 Reline.ambiguous_width elsif size == 1 && utf8_mbchar.size >= 2 second_char_ord = utf8_mbchar[1].ord # Halfwidth Dakuten Handakuten # Only these two character has Letter Modifier category and can be combined in a single grapheme cluster (second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1 else size end end
Source
# File lib/reline/unicode.rb, line 250 def self.get_next_mbchar_size(line, byte_pointer) grapheme = line.byteslice(byte_pointer..-1).grapheme_clusters.first grapheme ? grapheme.bytesize : 0 end
Source
# File lib/reline/unicode.rb, line 255 def self.get_prev_mbchar_size(line, byte_pointer) if byte_pointer.zero? 0 else grapheme = line.byteslice(0..(byte_pointer - 1)).grapheme_clusters.last grapheme ? grapheme.bytesize : 0 end end
Source
# File lib/reline/unicode.rb, line 57 def self.safe_encode(str, encoding) # Reline only supports utf-8 convertible string. converted = str.encode(encoding, invalid: :replace, undef: :replace) return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only? # This code is essentially doing the same thing as # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)` # but also avoids unneccesary irreversible encoding conversion. converted.gsub(/\X/) do |c| c.encode(Encoding::UTF_8) c rescue Encoding::UndefinedConversionError '?' end end
Source
# File lib/reline/unicode.rb, line 125 def self.split_by_width(str, max_width) lines = split_line_by_width(str, max_width) [lines, lines.size] end
This method is used by IRB
Source
# File lib/reline/unicode.rb, line 130 def self.split_line_by_width(str, max_width, encoding = str.encoding, offset: 0) lines = [String.new(encoding: encoding)] width = offset rest = str.encode(Encoding::UTF_8) in_zero_width = false seq = String.new(encoding: encoding) rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case when non_printing_start in_zero_width = true when non_printing_end in_zero_width = false when csi lines.last << csi unless in_zero_width if csi == -"\e[m" || csi == -"\e[0m" seq.clear else seq << csi end end when osc lines.last << osc seq << osc unless in_zero_width when gc unless in_zero_width mbchar_width = get_mbchar_width(gc) if (width += mbchar_width) > max_width width = mbchar_width lines << seq.dup end end lines.last << gc end end # The cursor moves to next line in first if width == max_width lines << String.new(encoding: encoding) end lines end
Source
# File lib/reline/unicode.rb, line 172 def self.strip_non_printing_start_end(prompt) prompt.gsub(/\x01([^\x02]*)(?:\x02|\z)/) { $1 } end
Source
# File lib/reline/unicode.rb, line 181 def self.take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false) chunk = String.new(encoding: str.encoding) end_col = start_col + width total_width = 0 rest = str.encode(Encoding::UTF_8) in_zero_width = false chunk_start_col = nil chunk_end_col = nil has_csi = false rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case when non_printing_start in_zero_width = true when non_printing_end in_zero_width = false when csi has_csi = true chunk << csi when osc chunk << osc when gc if in_zero_width chunk << gc next end mbchar_width = get_mbchar_width(gc) prev_width = total_width total_width += mbchar_width if (cover_begin || padding ? total_width <= start_col : prev_width < start_col) # Current character haven't reached start_col yet next elsif padding && !cover_begin && prev_width < start_col && start_col < total_width # Add preceding padding. This padding might have background color. chunk << ' ' chunk_start_col ||= start_col chunk_end_col = total_width next elsif (cover_end ? prev_width < end_col : total_width <= end_col) # Current character is in the range chunk << gc chunk_start_col ||= prev_width chunk_end_col = total_width break if total_width >= end_col else # Current character exceeds end_col if padding && end_col < total_width # Add succeeding padding. This padding might have background color. chunk << ' ' chunk_start_col ||= prev_width chunk_end_col = end_col end break end end end chunk_start_col ||= start_col chunk_end_col ||= start_col if padding && chunk_end_col < end_col # Append padding. This padding should not include background color. chunk << "\e[0m" if has_csi chunk << ' ' * (end_col - chunk_end_col) chunk_end_col = end_col end [chunk, chunk_start_col, chunk_end_col - chunk_start_col] end
Source
# File lib/reline/unicode.rb, line 177 def self.take_range(str, start_col, max_width) take_mbchar_range(str, start_col, max_width).first end
Take a chunk of a String
cut by width with escape sequences.
Source
# File lib/reline/unicode.rb, line 607 def self.vi_backward_word(line, byte_pointer) byte_size = 0 while 0 < (byte_pointer - byte_size) size = get_prev_mbchar_size(line, byte_pointer - byte_size) mbchar = line.byteslice(byte_pointer - byte_size - size, size) if mbchar =~ /\S/ if mbchar =~ /\w/ started_by = :word else started_by = :non_word_printable end break end byte_size += size end while 0 < (byte_pointer - byte_size) size = get_prev_mbchar_size(line, byte_pointer - byte_size) mbchar = line.byteslice(byte_pointer - byte_size - size, size) case started_by when :word break if mbchar =~ /\W/ when :non_word_printable break if mbchar =~ /[\w\s]/ end byte_size += size end byte_size end
Source
# File lib/reline/unicode.rb, line 483 def self.vi_big_backward_word(line, byte_pointer) byte_size = 0 while 0 < (byte_pointer - byte_size) size = get_prev_mbchar_size(line, byte_pointer - byte_size) mbchar = line.byteslice(byte_pointer - byte_size - size, size) break if mbchar =~ /\S/ byte_size += size end while 0 < (byte_pointer - byte_size) size = get_prev_mbchar_size(line, byte_pointer - byte_size) mbchar = line.byteslice(byte_pointer - byte_size - size, size) break if mbchar =~ /\s/ byte_size += size end byte_size end
Source
# File lib/reline/unicode.rb, line 459 def self.vi_big_forward_end_word(line, byte_pointer) if (line.bytesize - 1) > byte_pointer size = get_next_mbchar_size(line, byte_pointer) byte_size = size else return 0 end while (line.bytesize - 1) > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar =~ /\S/ byte_size += size end prev_byte_size = byte_size while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar =~ /\s/ prev_byte_size = byte_size byte_size += size end prev_byte_size end
Source
# File lib/reline/unicode.rb, line 442 def self.vi_big_forward_word(line, byte_pointer) byte_size = 0 while (line.bytesize - 1) > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar =~ /\s/ byte_size += size end while (line.bytesize - 1) > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar =~ /\S/ byte_size += size end byte_size end
Source
# File lib/reline/unicode.rb, line 649 def self.vi_first_print(line) byte_size = 0 while (line.bytesize - 1) > byte_size size = get_next_mbchar_size(line, byte_size) mbchar = line.byteslice(byte_size, size) if mbchar =~ /\S/ break end byte_size += size end byte_size end
Source
# File lib/reline/unicode.rb, line 538 def self.vi_forward_end_word(line, byte_pointer) if (line.bytesize - 1) > byte_pointer size = get_next_mbchar_size(line, byte_pointer) mbchar = line.byteslice(byte_pointer, size) if mbchar =~ /\w/ started_by = :word elsif mbchar =~ /\s/ started_by = :space else started_by = :non_word_printable end byte_size = size else return 0 end if (line.bytesize - 1) > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) if mbchar =~ /\w/ second = :word elsif mbchar =~ /\s/ second = :space else second = :non_word_printable end second_byte_size = size else return byte_size end if second == :space byte_size += second_byte_size while (line.bytesize - 1) > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) if mbchar =~ /\S/ if mbchar =~ /\w/ started_by = :word else started_by = :non_word_printable end break end byte_size += size end else case [started_by, second] when [:word, :non_word_printable], [:non_word_printable, :word] started_by = second else byte_size += second_byte_size started_by = second end end prev_byte_size = byte_size while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) case started_by when :word break if mbchar =~ /\W/ when :non_word_printable break if mbchar =~ /[\w\s]/ end prev_byte_size = byte_size byte_size += size end prev_byte_size end
Source
# File lib/reline/unicode.rb, line 500 def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false) if line.bytesize > byte_pointer size = get_next_mbchar_size(line, byte_pointer) mbchar = line.byteslice(byte_pointer, size) if mbchar =~ /\w/ started_by = :word elsif mbchar =~ /\s/ started_by = :space else started_by = :non_word_printable end byte_size = size else return 0 end while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) case started_by when :word break if mbchar =~ /\W/ when :space break if mbchar =~ /\S/ when :non_word_printable break if mbchar =~ /\w|\s/ end byte_size += size end return byte_size if drop_terminate_spaces while line.bytesize > (byte_pointer + byte_size) size = get_next_mbchar_size(line, byte_pointer + byte_size) mbchar = line.byteslice(byte_pointer + byte_size, size) break if mbchar =~ /\S/ byte_size += size end byte_size end