{"library":"sentencepiece","title":"SentencePiece","description":"SentencePiece is an unsupervised text tokenizer and detokenizer, primarily designed for Neural Network-based text generation systems where the vocabulary size is predetermined. It implements subword units like Byte-Pair Encoding (BPE) and Unigram Language Model, capable of training directly from raw sentences without pre-tokenization. The library is actively maintained with regular updates. The current version is 0.2.1.","language":"python","status":"active","last_verified":"Tue May 19","install":{"commands":["pip install sentencepiece"],"cli":{"name":"spm_encode","version":"sh: 1: spm_encode: not found"}},"imports":["import sentencepiece as spm\nsp = spm.SentencePieceProcessor()","import sentencepiece as spm\nspm.SentencePieceTrainer.train(...)"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"import sentencepiece as spm\nimport os\n\n# Create a dummy text file for training\ncorpus_content = \"This is a test sentence. SentencePiece is awesome.\\nAnother example sentence for training.\"\ncorpus_file = \"corpus.txt\"\nwith open(corpus_file, \"w\", encoding=\"utf-8\") as f:\n    f.write(corpus_content)\n\nmodel_prefix = \"m_model\"\nvocab_size = 8000\n\n# Train a SentencePiece model\nspm.SentencePieceTrainer.train(\n    input=corpus_file,\n    model_prefix=model_prefix,\n    vocab_size=vocab_size\n)\n\n# Load the trained model\nsp = spm.SentencePieceProcessor()\nsp.load(f\"{model_prefix}.model\")\n\n# Encode text\ntext_to_encode = \"SentencePiece tokenization is powerful.\"\nencoded_pieces = sp.encode_as_pieces(text_to_encode)\nencoded_ids = sp.encode_as_ids(text_to_encode)\n\nprint(f\"Original text: {text_to_encode}\")\nprint(f\"Encoded pieces: {encoded_pieces}\")\nprint(f\"Encoded IDs: {encoded_ids}\")\n\n# Decode IDs back to text\ndecoded_text = sp.decode_ids(encoded_ids)\nprint(f\"Decoded text: {decoded_text}\")\n\n# Clean up generated model files\nos.remove(f\"{model_prefix}.model\")\nos.remove(f\"{model_prefix}.vocab\")\nos.remove(corpus_file)","lang":"python","description":"This quickstart demonstrates how to train a SentencePiece model from a text file, load the trained model, and then use it to encode text into subword pieces and IDs, and decode IDs back to text. The `input` parameter for training expects a file path.","tag":"stale","tag_description":"widespread failures or data too old to trust","last_tested":"2026-04-24","results":[{"runtime":"python:3.10-alpine","exit_code":1},{"runtime":"python:3.10-slim","exit_code":1},{"runtime":"python:3.11-alpine","exit_code":1},{"runtime":"python:3.11-slim","exit_code":1},{"runtime":"python:3.12-alpine","exit_code":1},{"runtime":"python:3.12-slim","exit_code":1},{"runtime":"python:3.13-alpine","exit_code":1},{"runtime":"python:3.13-slim","exit_code":1},{"runtime":"python:3.9-alpine","exit_code":1},{"runtime":"python:3.9-slim","exit_code":1}]},"compatibility":{"tag":"draft","tag_description":"notable install failures or slow imports","last_tested":"2026-05-19","installed_version":"0.2.1","pypi_latest":"0.2.1","is_stale":false,"summary":{"python_range":"3.10–3.9","success_rate":50,"avg_install_s":1.6,"avg_import_s":0.06,"wheel_type":"wheel"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":null,"install_time_s":1.6,"import_time_s":0.04,"mem_mb":2.5,"disk_size":"22M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":1.6,"import_time_s":0.03,"mem_mb":2.5,"disk_size":"22M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":0.03,"mem_mb":2.5,"disk_size":"22M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":null,"install_time_s":1.8,"import_time_s":0.06,"mem_mb":2.8,"disk_size":"24M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":1.6,"import_time_s":0.06,"mem_mb":2.8,"disk_size":"24M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":0.07,"mem_mb":2.8,"disk_size":"24M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":null,"install_time_s":1.5,"import_time_s":0.09,"mem_mb":3.5,"disk_size":"15M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":1.5,"import_time_s":0.09,"mem_mb":3.5,"disk_size":"15M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":0.09,"mem_mb":3.5,"disk_size":"15M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":null,"install_time_s":1.5,"import_time_s":0.08,"mem_mb":3.3,"disk_size":"15M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":1.5,"import_time_s":0.07,"mem_mb":3.3,"disk_size":"15M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":0.08,"mem_mb":3.2,"disk_size":"15M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":"build_error","import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"sentencepiece","exit_code":1,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":null},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":null,"install_time_s":1.9,"import_time_s":0.04,"mem_mb":2.4,"disk_size":"21M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":"wheel","failure_reason":null,"import_side_effects":"noisy","install_time_s":1.9,"import_time_s":0.03,"mem_mb":2.4,"disk_size":"21M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"sentencepiece","exit_code":0,"wheel_type":null,"failure_reason":null,"import_side_effects":null,"install_time_s":null,"import_time_s":0.04,"mem_mb":2.4,"disk_size":"21M"}]}}