Java源码示例:org.nlpcn.commons.lang.tire.domain.SmartForest

示例1
/**
 * 二分法查找.摘抄了jdk的东西..只不过把他的自动装箱功能给去掉了
 *
 * @param branches
 *            branches
 * @param c
 *            char
 * @return idx
 */

public static <T extends SmartForest<T>> int binarySearch(T[] branches, char c) {
	int high = branches.length - 1;
	if (branches.length < 1) {
		return high;
	}
	int low = 0;
	while (low <= high) {
		int mid = (low + high) >>> 1;
		int cmp = branches[mid].compareTo(c);

		if (cmp < 0) {
			low = mid + 1;
		} else if (cmp > 0) {
			high = mid - 1;
		} else {
			return mid; // key found
		}
	}
	return -(low + 1); // key not found.
}
 
示例2
/**
 * 获取一个词语的参数
 * 
 * @param word
 * @return
 */
public String[] getParams(String word) {
    for (Forest forest : forests) {
        if (forest == null) {
            continue;
        }
        SmartForest<String[]> sf = forest;
        for (int i = 0; i < word.length(); i++) {
            sf = sf.get(word.charAt(i));
            if (sf == null) {
                return null;
            }
        }
        if (sf.getStatus() > 1) {
            return sf.getParam();
        } else {
            return null;
        }
    }
    return null;
}
 
示例3
/**
 * 增加一个新词到树中
 * 
 * @param newWord
 */
public void addTerm(NewWord newWord) {
    NewWord temp = null;
    SmartForest<NewWord> smartForest = null;
    if ((smartForest = sf.getBranch(newWord.getName())) != null && smartForest.getParam() != null) {
        temp = smartForest.getParam();
        temp.update(newWord.getNature(), newWord.getAllFreq());
    } else {
        count++;
        if (splitWord == null) {
            newWord.setScore(-1);
        } else {
            newWord.setScore(-splitWord.cohesion(newWord.getName()));
        }

        synchronized (sf) {
            sf.add(newWord.getName(), newWord);
        }
    }
}
 
示例4
private void valueResult(SmartForest<NewWord> smartForest, HashMap<String, Double> hm, Nature nature) {

        if (smartForest == null || smartForest.branches == null) {
            return;
        }
        for (int i = 0; i < smartForest.branches.length; i++) {
            NewWord param = smartForest.branches[i].getParam();
            if (smartForest.branches[i].getStatus() == 3) {
                if (param.isActive() && (nature == null || param.getNature().equals(nature))) {
                    hm.put(param.getName(), param.getScore());
                }
            } else if (smartForest.branches[i].getStatus() == 2) {
                if (param.isActive() && (nature == null || param.getNature().equals(nature))) {
                    hm.put(param.getName(), param.getScore());
                }
                valueResult(smartForest.branches[i], hm, nature);
            } else {
                valueResult(smartForest.branches[i], hm, nature);
            }
        }
    }
 
示例5
public String tagContent(List<Keyword> keyWords, String content) {
    SmartForest<Double> sf = new SmartForest<>();
    for (Keyword keyWord : keyWords) {
        sf.add(keyWord.getName().toLowerCase(), keyWord.getScore());
    }

    SmartGetWord<Double> sgw = new SmartGetWord<>(sf, content.toLowerCase());

    int beginOffe = 0;
    String temp = null;
    StringBuilder sb = new StringBuilder();
    while ((temp = sgw.getFrontWords()) != null) {
        sb.append(content.substring(beginOffe, sgw.offe));
        sb.append(beginTag);
        sb.append(content.substring(sgw.offe, sgw.offe + temp.length()));
        sb.append(endTag);
        beginOffe = sgw.offe + temp.length();
    }

    if (beginOffe <= content.length() - 1) {
        sb.append(content.substring(beginOffe, content.length()));
    }

    return sb.toString();
}
 
示例6
/**
 * 增加一个词典
 * 
 * @param key
 * @param path
 * @param value
 */
public static void putLibrary(String key, String path, Object value) {
    if (key.startsWith(DicLibrary.DEFAULT)) {
        DicLibrary.put(key, path, (Forest) value);
    } else if (key.startsWith(StopLibrary.DEFAULT)) {
        StopLibrary.put(key, path, (StopRecognition) value);
    } else if (key.startsWith(SynonymsLibrary.DEFAULT)) {
        SynonymsLibrary.put(key, path, (SmartForest) value);
    } else if (key.startsWith(AmbiguityLibrary.DEFAULT)) {
        AmbiguityLibrary.put(key, path, (Forest) value);
    } else if (key.startsWith(CrfLibrary.DEFAULT)) {
        CrfLibrary.put(key, path, (SplitWord) value);
    } else {
        throw new LibraryException(key + " type err must start with dic,stop,ambiguity,synonyms");
    }
    ENV.put(key, path);
}
 
示例7
/**
 */
public static SmartForest<List<String>> get(String key) {
    KV<String, SmartForest<List<String>>> kv = SYNONYMS.get(key);

    if (kv == null) {
        if (MyStaticValue.ENV.containsKey(key)) {
            putIfAbsent(key, MyStaticValue.ENV.get(key));
            return get(key);
        }
        LOG.warn("crf " + key + " not found in config ");
        return null;
    }

    SmartForest<List<String>> sw = kv.getV();
    if (sw == null) {
        sw = init(key, kv, false);
    }
    return sw;
}
 
示例8
private static Set<String> findAllWords(String key, String[] words) {

        SmartForest<List<String>> synonyms = get(key);

        Set<String> set = new HashSet<>();
        for (String word : words) {
            SmartForest<List<String>> branch = synonyms.getBranch(word);
            if (branch != null) {
                List<String> params = branch.getParam();
                if (params != null) {
                    set.addAll(params);
                }
            }
        }
        return set;
    }
 
示例9
/**
 * 从同义词组中删除掉一个词 [中国, 中华, 我国] -> remove(我国) -> [中国, 中华]
 * 
 * @param words
 */
public static void remove(String key, String word) {

    SmartForest<List<String>> synonyms = get(key);

    SmartForest<List<String>> branch = synonyms.getBranch(word);

    if (branch == null || branch.getStatus() < 2) {
        return;
    }

    List<String> params = branch.getParam();

    synonyms.remove(word);
    branch.setParam(null);
    params.remove(word);

    if (params.size() == 1) { //如果是1 个也删除
        synonyms.remove(params.get(0));
        params.remove(0);
    } else {
        params.remove(word);
    }
}
 
示例10
@Override
protected NlpDictionary getDictionary(String... texts) {
    SmartForest<Boolean> forest = new SmartForest<Boolean>();
    for (String text : texts) {
        forest.add(text, true);
    }
    AnsjDictionary dictionary = new AnsjDictionary(forest);
    return dictionary;
}
 
示例11
/**
 * 构建用户自定义的dat
 * 
 * @throws FileNotFoundException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
public void maker(final String dicPath, final Class<? extends Item> cla) throws FileNotFoundException, InstantiationException, IllegalAccessException {
	long start = System.currentTimeMillis();
	LOG.info("make basic tire begin !");

	final SmartForest<Item> forest = new SmartForest<Item>();
	final FileIterator it = IOUtil.instanceFileIterator(dicPath, IOUtil.UTF8);
	if (it == null) {
		throw new FileNotFoundException();
	}
	try {
		String temp;
		while (it.hasNext()) {
			temp = it.next();
			if (StringUtil.isBlank(temp)) {
				continue;
			}
			final Item item = cla.newInstance();
			final String[] split = temp.split("\t");
			item.init(split);
			forest.add(split[0], item);
		}
	} finally {
		it.close();
	}
	LOG.info("make basic tire over use time " + (System.currentTimeMillis() - start) + " ms");

	start = System.currentTimeMillis();
	LOG.info("make dat tire begin !");
	makeDAT(tree2List(cla, forest));
	LOG.info("make dat tire over use time " + (System.currentTimeMillis() - start) + " ms! dat len is " + datArrLen() + "! dat size is " + datItemSize());

}
 
示例12
private static void mergeSort(SmartForest[] src, SmartForest[] dest, int low, int high, int off) {
	int length = high - low;

	// Insertion sort on smallest arrays
	if (length < INSERTIONSORT_THRESHOLD) {
		for (int i = low; i < high; i++) {
			for (int j = i; j > low && (dest[j - 1]).compareTo(dest[j].getC()) > 0; j--) {
				swap(dest, j, j - 1);
			}
		}
		return;
	}

	// Recursively sort halves of dest into src
	int destLow = low;
	int destHigh = high;
	low += off;
	high += off;
	int mid = (low + high) >>> 1;
	mergeSort(dest, src, low, mid, -off);
	mergeSort(dest, src, mid, high, -off);

	// If list is already sorted, just copy from src to dest. This is an
	// optimization that results in faster sorts for nearly ordered lists.
	if (src[mid - 1].compareTo(src[mid].getC()) <= 0) {
		System.arraycopy(src, low, dest, destLow, length);
		return;
	}

	// Merge sorted halves (now in src) into dest
	for (int i = destLow, p = low, q = mid; i < destHigh; i++) {
		if (q >= high || p < mid && src[p].compareTo(src[q].getC()) <= 0) {
			dest[i] = src[p++];
		} else {
			dest[i] = src[q++];
		}
	}
}
 
示例13
private static void insertWord(Forest forest, String temp, String... param) {
	SmartForest<String[]> branch = forest;
	char[] chars = temp.toCharArray();
	for (int i = 0; i < chars.length; i++) {
		if (chars.length == i + 1) {
			branch.add(new Forest(chars[i], 3, param));
		} else {
			branch.add(new Forest(chars[i], 1, null));
		}
		branch = branch.getBranch(chars[i]);
	}
}
 
示例14
/**
 * 删除一个词
 *
 * @param forest
 * @param temp
 */
public static void removeWord(Forest forest, String word) {
	SmartForest<String[]> branch = forest;
	char[] chars = word.toCharArray();

	for (int i = 0; i < chars.length; i++) {
		if (branch == null) {
			return;
		}
		if (chars.length == i + 1) {
			branch.add(new Forest(chars[i], -1, null));
		}
		branch = branch.getBranch(chars[i]);
	}
}
 
示例15
public static void main(String[] args) {
	/**
	 * 词典的构造.一行一个词后面是参数.可以从文件读取.可以是read流.
	 */
	long start = System.currentTimeMillis();
	SmartForest<Integer> forest = new SmartForest<Integer>();

	forest.add("中国", 3);

	forest.add("android", 3);

	forest.add("java", 3);
	
	forest.add("jav", 3);

	forest.add("中国人", 3);
	forest.add("国人", 3);
	
	forest.add("0",3);
	forest.add("3",3);

	String content = " Android-java-中国人00000000000000 1230 013 33333";
	
	
	content = StringUtil.rmHtmlTag(content);

	for (int i = 0; i < 1; i++) {
		SmartGetWord<Integer> udg = forest.getWord(content.toLowerCase().toCharArray());

		String temp;
		while ((temp = udg.getAllWords()) != null) {
			System.out.println(temp + "\t" + udg.getParam());
		}
	}
	System.out.println(System.currentTimeMillis() - start);
}
 
示例16
@Test
public void test() {
	/**
	 * 词典的构造.一行一个词后面是参数.可以从文件读取.可以是read流.
	 */
	long start = System.currentTimeMillis();
	SmartForest<Integer> forest = new SmartForest<Integer>();

	forest.add("中国", 3);

	forest.add("android", 3);

	forest.add("java", 3);

	forest.add("中国人", 3);

	String content = " Android-java-中国人";
	
	
	forest.remove("中国人") ;
	
	content = StringUtil.rmHtmlTag(content);

	for (int i = 0; i < 1; i++) {
		SmartGetWord<Integer> udg = forest.getWord(content.toLowerCase().toCharArray());

		String temp;
		while ((temp = udg.getFrontWords()) != null) {
			System.out.println(temp + "\t" + udg.getParam());
		}
	}
	System.out.println(System.currentTimeMillis() - start);
}
 
示例17
/**
 * 传入一个term 返回这个term的状态
 * 
 * @param branch
 * @param term
 * @return
 */
private SmartForest<String[]> termStatus(SmartForest<String[]> branch, Term term) {
    String name = term.getName();
    SmartForest<String[]> sf = branch;
    for (int j = 0; j < name.length(); j++) {
        sf = sf.get(name.charAt(j));
        if (sf == null) {
            return null;
        }
    }
    return sf;
}
 
示例18
/**
 * 通过规则 猜测词性
 * 
 * @param word
 * @return
 */
public static TermNatures guessNature(String word) {
    String nature = null;
    SmartForest<String[]> smartForest = SUFFIX_FOREST;
    int len = 0;
    for (int i = word.length() - 1; i >= 0; i--) {
        smartForest = smartForest.get(word.charAt(i));
        if (smartForest == null) {
            break;
        }
        len++;
        if (smartForest.getStatus() == 2) {
            nature = smartForest.getParam()[0];
        } else if (smartForest.getStatus() == 3) {
            nature = smartForest.getParam()[0];
            break;
        }
    }

    if ("nt".equals(nature) && (len > 1 || word.length() > 3)) {
        return TermNatures.NT;
    } else if ("ns".equals(nature)) {
        return TermNatures.NS;
    } else if (word.length() < 5) {
        Result parse = ToAnalysis.parse(word);
        for (Term term : parse.getTerms()) {
            if ("nr".equals(term.getNatureStr())) {
                return TermNatures.NR;
            }
        }
    } else if (ForeignPersonRecognition.isFName(word)) {
        return TermNatures.NRF;
    }

    return TermNatures.NW;
}
 
示例19
@Override
public void recognition(Result result) {
    for (Term term : result) {
        SmartForest<List<String>> branch = synonyms.getBranch(term.getName());
        if (branch != null && branch.getStatus() > 1) {
            List<String> syns = branch.getParam();
            if (syns != null) {
                term.setSynonyms(syns);
            }
        }
    }
}
 
示例20
public static String[] getParams(Forest forest, String word) {
    SmartForest<String[]> temp = forest;
    for (int i = 0; i < word.length(); i++) {
        temp = temp.get(word.charAt(i));
        if (temp == null) {
            return null;
        }
    }
    if (temp.getStatus() > 1) {
        return temp.getParam();
    } else {
        return null;
    }
}
 
示例21
/**
 * 尝试激活,新词
 * 
 * @param name
 */
public void active(String name) {
    SmartForest<NewWord> branch = sf.getBranch(name);
    if (branch != null && branch.getParam() != null) {
        branch.getParam().setActive(true);
    }
}
 
示例22
/**
 * 获得特征所在权重数组
 * 
 * @param featureStr
 * @return
 */
public float[] getFeature(char... chars) {
    if (chars == null) {
        return null;
    }
    SmartForest<float[]> sf = featureTree;
    sf = sf.getBranch(chars);
    if (sf == null || sf.getParam() == null) {
        return null;
    }
    return sf.getParam();
}
 
示例23
@Override
public CRFModel loadModel(InputStream is) throws Exception {
    long start = System.currentTimeMillis();
    try (ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(is))) {
        ois.readUTF();
        this.status = (float[][]) ois.readObject();
        int[][] template = (int[][]) ois.readObject();
        this.config = new Config(template);
        int win = 0;
        int size = 0;
        String name = null;
        featureTree = new SmartForest<float[]>();
        float[] value = null;
        do {
            win = ois.readInt();
            size = ois.readInt();
            for (int i = 0; i < size; i++) {
                name = ois.readUTF();
                value = new float[win];
                for (int j = 0; j < value.length; j++) {
                    value[j] = ois.readFloat();
                }
                featureTree.add(name, value);
            }
        } while (win == 0 || size == 0);
        logger.info("load crf model ok ! use time :" + (System.currentTimeMillis() - start));
    }
    return this;
}
 
示例24
/**
 * 计算一个句子的分数
 * 
 * @param sentence
 * @param sf
 */
private void computeScore(Sentence sentence, SmartForest<Double> forest) {
    SmartGetWord<Double> sgw = new SmartGetWord<>(forest, sentence.value);
    String name = null;
    while ((name = sgw.getFrontWords()) != null) {
        sentence.updateScore(name, sgw.getParam());
    }
    if (sentence.score == 0) {
        sentence.score = sentence.value.length() * -0.005;
    } else {
        sentence.score /= Math.log(sentence.value.length() + 3);
    }
}
 
示例25
/**
 * 删除一个key
 * 
 * @param key
 * @return
 */
public static KV<String, SmartForest<List<String>>> remove(String key) {
    KV<String, SmartForest<List<String>>> kv = SYNONYMS.get(key);
    if (kv != null && kv.getV() != null) { //先清空后删除
        kv.getV().clear();
    }
    MyStaticValue.ENV.remove(key);
    return SYNONYMS.remove(key);
}
 
示例26
public AnsjDictionary(SmartForest<?> forest) {
    this.forest = forest;
}
 
示例27
/**
 * 将tire树 广度遍历为List
 * 
 * @throws InstantiationException
 */
private List<Item> tree2List(final Class<? extends Item> cla, final SmartForest<Item> forest) throws InstantiationException, IllegalAccessException {
	final List<Item> all = new ArrayList<Item>();
	treeToLibrary(cla, all, forest, "");
	return all;
}
 
示例28
public static void sort(SmartForest[] a) {
	SmartForest[] aux = a.clone();
	mergeSort(aux, a, 0, a.length, 0);
}
 
示例29
public static void sort(SmartForest[] a, int fromIndex, int toIndex) {
	rangeCheck(a.length, fromIndex, toIndex);
	SmartForest[] aux = copyOfRange(a, fromIndex, toIndex);
	mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
}
 
示例30
/**
 * Swaps x[a] with x[b].
 */
private static void swap(SmartForest[] x, int a, int b) {
	SmartForest t = x[a];
	x[a] = x[b];
	x[b] = t;
}