{"id":27408,"library":"soynlp","title":"soynlp","description":"An unsupervised Korean Natural Language Processing toolkit for tokenization, stemming, part-of-speech tagging, and noun extraction. The current version is 0.0.493. Development has stalled since 2020; the repository is archived and no longer maintained as of version 0.1.1 (which is not on PyPI).","status":"deprecated","version":"0.0.493","language":"python","source_language":"en","source_url":"https://github.com/lovit/soynlp","tags":["korean-nlp","unsupervised","tokenization","noun-extraction"],"install":[{"cmd":"pip install soynlp","lang":"bash","label":"PyPI install"}],"dependencies":[{"reason":"Used for system resource monitoring during tokenization","package":"psutil","optional":false},{"reason":"Used for some vectorization utilities","package":"scikit-learn","optional":true}],"imports":[{"note":"Correct import for noun extraction","symbol":"soynlp.noun.LRNounExtractor_v2","correct":"from soynlp.noun import LRNounExtractor_v2"},{"note":"Correct import for tokenization","symbol":"soynlp.tokenizer.MaxScoreTokenizer","correct":"from soynlp.tokenizer import MaxScoreTokenizer"},{"note":"Correct import for text normalization (repeat char, emoticon, etc.)","symbol":"soynlp.normalize","correct":"from soynlp import normalize"},{"note":"Correct import for word extraction (note: 'word' not 'words')","symbol":"soynlp.words.WordExtractor","correct":"from soynlp.word import WordExtractor"}],"quickstart":{"code":"from soynlp import DoublespaceLineCorpus\nfrom soynlp.word import WordExtractor\nfrom soynlp.tokenizer import LTokenizer\n\ncorpus = DoublespaceLineCorpus('dataset.txt', iter_sent=True)\nword_extractor = WordExtractor(max_iter_learning_steps=100)\nword_extractor.train(corpus)\nscores = word_extractor.extract()\nscores = {word:score.cohesion_forward for word, score in scores.items()}\ntokenizer = LTokenizer(scores=scores)\ntext = '한국어 자연어 처리'\nprint(tokenizer.tokenize(text))","lang":"python","description":"Basic unsupervised tokenization using word extraction scores."},"warnings":[{"fix":"Consider migrating to modern Korean NLP libraries such as Kiwi (kiwipiepy), KoNLPy, or Hugging Face tokenizers.","message":"The repository is archived on GitHub (last release 0.1.1, not on PyPI). PyPI version 0.0.493 is several years old and will not receive updates.","severity":"deprecated","affected_versions":"all"},{"fix":"Replace 'from soynlp.words import WordExtractor' with 'from soynlp.word import WordExtractor'.","message":"In some versions, import paths changed. Using 'soynlp.words' (with an 's') will fail; use 'soynlp.word' instead.","severity":"breaking","affected_versions":">=0.0.400"},{"fix":"Extract scores as shown in quickstart: scores = {word:score.cohesion_forward for word, score in scores.items()}.","message":"The LTokenzier sorce dictionary must be a dict mapping word to a float score (e.g., cohesion_forward). Passing raw WordExtractor output will cause a TypeError.","severity":"gotcha","affected_versions":"all"},{"fix":"Ensure input file has tokens separated by two spaces, or preprocess accordingly.","message":"DoublespaceLineCorpus expects double-space separated tokens per line, not arbitrary text. Using raw text lines will produce garbage corpus sentences.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-04-27T00:00:00.000Z","next_check":"2026-07-26T00:00:00.000Z","problems":[{"fix":"Use 'from soynlp.word import WordExtractor' instead.","cause":"Incorrect import path; the correct module is 'soynlp.word' (singular).","error":"ModuleNotFoundError: No module named 'soynlp.words'"},{"fix":"Extract scores: scores = {word:score.cohesion_forward for word, score in word_extractor.extract().items()}","cause":"Passing the WordExtractor object directly as a tokenizer score dictionary; expected a dict.","error":"TypeError: 'WordExtractor' object is not iterable"},{"fix":"Call word_extractor.train(corpus) before word_extractor.extract()","cause":"Calling extract() without calling train() first on a corpus.","error":"ValueError: No corpus was trained"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}