{"library":"soynlp","title":"soynlp","type":"library","description":"An unsupervised Korean Natural Language Processing toolkit for tokenization, stemming, part-of-speech tagging, and noun extraction. The current version is 0.0.493. Development has stalled since 2020; the repository is archived and no longer maintained as of version 0.1.1 (which is not on PyPI).","language":"python","status":"deprecated","last_verified":"Mon Apr 27","install":{"commands":["pip install soynlp"],"cli":null},"imports":["from soynlp.noun import LRNounExtractor_v2","from soynlp.tokenizer import MaxScoreTokenizer","from soynlp import normalize","from soynlp.word import WordExtractor"],"auth":{"required":false,"env_vars":[]},"links":{"homepage":"https://soynlp.github.io/soynlp/","github":"https://github.com/lovit/soynlp","docs":null,"changelog":null,"pypi":"https://pypi.org/project/soynlp/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null},"quickstart":{"code":"from soynlp import DoublespaceLineCorpus\nfrom soynlp.word import WordExtractor\nfrom soynlp.tokenizer import LTokenizer\n\ncorpus = DoublespaceLineCorpus('dataset.txt', iter_sent=True)\nword_extractor = WordExtractor(max_iter_learning_steps=100)\nword_extractor.train(corpus)\nscores = word_extractor.extract()\nscores = {word:score.cohesion_forward for word, score in scores.items()}\ntokenizer = LTokenizer(scores=scores)\ntext = '한국어 자연어 처리'\nprint(tokenizer.tokenize(text))","lang":"python","description":"Basic unsupervised tokenization using word extraction scores.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":null}