本文整理了Java中zemberek.core.logging.Log
类的一些代码示例,展示了Log
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Log
类的具体详情如下:
包路径:zemberek.core.logging.Log
类名称:Log
[英]A convenient Log class.
[中]一个方便的日志类。
代码示例来源:origin: ahmetaa/zemberek-nlp
public static void simpleSentenceBoundaryDetector() {
String input =
"Prof. Dr. Veli Davul açıklama yaptı. Kimse %6.5 lik enflasyon oranını beğenmemiş!" +
" Oysa maçta ikinci olmuştuk... Değil mi?";
Log.info("Paragraph = " + input);
TurkishSentenceExtractor extractor = TurkishSentenceExtractor.DEFAULT;
List<String> sentences = extractor.fromParagraph(input);
Log.info("Sentences:");
for (String sentence : sentences) {
Log.info(sentence);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public Builder setCondition(Condition _condition) {
if (condition != null) {
Log.warn("Condition was already set.");
}
this.condition = _condition;
return this;
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public synchronized void initializeStaticCache(Function<String, WordAnalysis> analysisProvider) {
if (staticCacheDisabled || staticCacheInitialized) {
return;
}
new Thread(() -> {
try {
Stopwatch stopwatch = Stopwatch.createStarted();
List<String> words = TextIO.loadLinesFromResource(MOST_USED_WORDS_FILE);
Log.debug("File read in %d ms.", stopwatch.elapsed(TimeUnit.MILLISECONDS));
int size = Math.min(STATIC_CACHE_CAPACITY, words.size());
for (int i = 0; i < size; i++) {
String word = words.get(i);
staticCache.put(word, analysisProvider.apply(word));
}
Log.debug("Static cache initialized with %d most frequent words", size);
Log.debug("Initialization time: %d ms.", stopwatch.elapsed(TimeUnit.MILLISECONDS));
} catch (IOException e) {
Log.error("Could not read most frequent words list, static cache is disabled.");
e.printStackTrace();
}
}).start();
staticCacheInitialized = true;
}
代码示例来源:origin: ahmetaa/zemberek-nlp
Log.info("Loading from: " + idFile);
if (!idFile.exists()) {
Log.warn("Cannot find n-gram id file " + idFile.getAbsolutePath());
continue;
Log.warn("Cannot load key file as flat array. Too much index values.");
continue;
代码示例来源:origin: ahmetaa/zemberek-nlp
Log.debug("Changing log base from " + DEFAULT_LOG_BASE + " to " + logBase);
changeLogBase(logBase);
this.stupidBackoffLogAlpha = (float) (Math.log(stupidBackoffAlpha) / Math.log(logBase));
Log.debug("Applying unigram smoothing with unigram weight: " + unigramWeight);
applyUnigramSmoothing(unigramWeight);
Log.debug("Lm will use stupid back off with alpha value: " + stupidBackoffAlpha);
Log.warn("Ngram id file directory %s does not exist. Continue without loading.",
ngramKeyFileDir);
} else {
Log.info("Loading actual n-gram id data.");
this.ngramIds = new NgramIds(this.order, ngramKeyFileDir, mphfs);
代码示例来源:origin: ahmetaa/zemberek-nlp
File tmp = Files.createTempDir();
Splitter splitter = new Splitter(file, tmp, chunkBits);
Log.info("Gram count: " + splitter.gramCount);
Log.info("Segment count: " + splitter.pageCount);
Log.info("Average segment size: " + (1 << splitter.pageBit));
Log.info("Segmenting File...");
splitter.split();
int bucketBits = splitter.pageBit - 2;
for (int i = 0; i < splitter.pageCount; i++) {
final ByteGramProvider keySegment = splitter.getKeySegment(i);
Log.debug("Segment key count: " + keySegment.keyAmount());
Log.debug("Segment bucket ratio: " + ((double) keySegment.keyAmount() / (1 << bucketBits)));
total += keySegment.keyAmount();
MultiLevelMphf mphf = MultiLevelMphf.generate(keySegment);
Log.info("MPHF is generated for segment %d with %d keys. Average bits per key: %.3f",
i,
mphf.size(),
Log.debug("Total processed keys:" + total);
int maxMask = (1 << splitter.maxBit) - 1;
int bucketMask = (1 << bucketBits) - 1;
代码示例来源:origin: ahmetaa/zemberek-nlp
for (String word : inputVocabulary) {
if (vocabularyIndexMap.containsKey(word)) {
Log.warn("Language model vocabulary has duplicate item: " + word);
continue;
Log.warn(
"Unknown word was already defined as %s but another matching token exist in the input vocabulary: %s",
unknownWord, word);
Log.warn(
"Sentence start index was already defined as %s but another matching token exist in the input vocabulary: %s",
sentenceStart, word);
Log.warn(
"Sentence end index was already defined as %s but another matching token exist in the input vocabulary: %s",
sentenceEnd, word);
cleanVocab.add(unknownWord);
vocabularyIndexMap.put(unknownWord, indexCounter++);
Log.debug("Necessary special token " + unknownWord
+ " was not found in the vocabulary, it is added explicitly");
cleanVocab.add(sentenceStart);
vocabularyIndexMap.put(sentenceStart, indexCounter++);
Log.debug("Vocabulary does not contain sentence start token, it is added explicitly.");
cleanVocab.add(sentenceEnd);
vocabularyIndexMap.put(sentenceEnd, indexCounter);
Log.debug("Vocabulary does not contain sentence end token, it is added explicitly.");
代码示例来源:origin: ahmetaa/zemberek-nlp
public static Quantizer getQuantizer(File file, int bitCount, QuantizerType quantizerType)
throws IOException {
BinaryFloatFileReader reader = new BinaryFloatFileReader(file);
try (DataInputStream dis = reader.getStream()) {
dis.skipBytes(4); // skip the count.
LookupCalculator lookupCalc = new LookupCalculator(bitCount);
for (int i = 0; i < reader.count; i++) {
double d = dis.readFloat();
lookupCalc.add(d);
if (Log.isDebug() && i % 500000 == 0) {
Log.debug("Values added to value histogram = %d", i);
}
}
return lookupCalc.getQuantizer(quantizerType);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
@Override
public void uncaughtException(Thread t, Throwable e) {
Log.error("Exception occurred in thread :" + t.getName(), e);
e.printStackTrace();
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
Log.info("Generating models for:" + Arrays.toString(languages));
modelFileMap.remove(l);
} else {
Log.warn("Cannot find count model file for language " + language);
Log.info("Generating garbage model from remaining count models.");
CharNgramCountModel garbageModel = new CharNgramCountModel("unk", order);
for (File file : modelFileMap.values()) {
代码示例来源:origin: ahmetaa/zemberek-nlp
if (Log.isDebug()) {
Log.debug("Failed key Count:%d " , failedKeyCount);
代码示例来源:origin: ahmetaa/zemberek-nlp
public static void simpleTokenization() {
Log.info("Simple tokenization returns a list of token strings.");
TurkishTokenizer tokenizer = TurkishTokenizer.DEFAULT;
String input = "İstanbul'a, merhaba!";
Log.info("Input = " + input);
Log.info("Tokenization list = " +
Joiner.on("|").join(tokenizer.tokenizeToStrings("İstanbul'a, merhaba!")));
}
代码示例来源:origin: ahmetaa/zemberek-nlp
Log.info("Amount of words to include using include file: %d", wordsToInclude.size());
Set<String> wordsToExclude = getWordsFromFile(excludeFile);
Log.info("Amount of words to exclude using exclude file: %d", wordsToExclude.size());
Log.warn(
"There are matching words in both include and exclude files: " + intersection.toString());
Log.info("Processing corpus: %s", corpus);
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(corpus), "utf-8"))) {
Log.info("%d lines processed. Vocabulary Size: %d", count, histogram.size());
Log.info("A total of %d lines have been processed. Vocabulary Size: %d", count,
histogram.size());
top = histogram.size();
} else {
Log.info("Top %d words will be used.", top);
Log.info("Coverage: %.3f",
100d * ((double) histogram.totalCount(mostFrequent)) / histogram.totalCount());
Log.info("Total size of vocabulary: %d", result.size());
if (ordered) {
Log.info("Sorting file with word order.");
result.sort(collator);
Log.info("Saving to vocabulary file: %s", outFile);
代码示例来源:origin: ahmetaa/zemberek-nlp
public MorphemeState addIncoming(MorphemeTransition... suffixTransitions) {
for (MorphemeTransition suffixTransition : suffixTransitions) {
if (incoming.contains(suffixTransition)) {
Log.warn("Incoming transition %s already exist in %s", suffixTransition, this);
}
incoming.add(suffixTransition);
}
return this;
}
代码示例来源:origin: ahmetaa/zemberek-nlp
ArpaToBinaryConverter(File dir, int fractionDigitCount)
throws FileNotFoundException {
Log.info("Generating multi file uncompressed language model from Arpa file in directory: %s",
dir.getAbsolutePath());
this.dir = dir;
if (fractionDigitCount >= 0) {
fractionMultiplier = Math.pow(10, fractionDigitCount);
} else {
fractionMultiplier = 0;
}
start = System.currentTimeMillis();
}
代码示例来源:origin: ahmetaa/zemberek-nlp
static void countTokens(Path... paths) throws IOException {
for (Path path : paths) {
List<String> lines = TextIO.loadLines(path);
Histogram<String> hw = new Histogram<>();
Histogram<String> hl = new Histogram<>();
for (String l : lines) {
for (String s : l.split("[\\s]+")) {
if (s.contains("__label__")) {
if(s.contains("-")) {
Log.warn(l);
}
hl.add(s);
} else {
hw.add(s);
}
}
}
Log.info("There are %d lines, %d words, %d labels in %s",
lines.size(),
hw.size(),
hl.size(),
path);
}
}
代码示例来源:origin: ahmetaa/zemberek-nlp
public MorphemeState addOutgoing(MorphemeTransition... suffixTransitions) {
for (MorphemeTransition suffixTransition : suffixTransitions) {
if (outgoing.contains(suffixTransition)) {
Log.warn("Outgoing transition %s already exist in %s", suffixTransition, this);
}
outgoing.add(suffixTransition);
}
return this;
}
代码示例来源:origin: ahmetaa/zemberek-nlp
void info() {
Log.info("There are %d sentences and %d tokens.",
sentences.size(),
sentences.stream().mapToInt(SentenceAnalysis::size).sum());
}
代码示例来源:origin: ahmetaa/zemberek-nlp
Log.info("Processing.. %d found.", batchResult.acceptedSentences.size());
for (String sentence : toProcess) {
Log.warn("Sentence [%s] contains word(s) that all analyses are ignored.",
r.sentence);
continue;
代码示例来源:origin: ahmetaa/zemberek-nlp
/**
* @param index Word for the index. if index is out of bounds, <UNK> is returned with warning.
* Note that Vocabulary may contain <UNK> token as well.
*/
public String getWord(int index) {
if (index < 0 || index >= vocabulary.size()) {
Log.warn("Out of bounds word index is used:" + index);
return unknownWord;
}
return vocabulary.get(index);
}
内容来源于网络,如有侵权,请联系作者删除!