{"payload":{"pageCount":2,"repositories":[{"type":"Public","name":"llmc","owner":"ModelTC","isFork":false,"description":"This is the official PyTorch implementation of \"LLMC: Benchmarking Large Language Model Quantization with a Versatile Compression Toolkit\".","allTopics":["benchmark","deployment","tool","evaluation","pruning","quantization","post-training-quantization","awq","large-language-models","llm","vllm","smoothquant","mixtral","internlm2","minicpm","llama3","omniquant","smollm","quarot","lightllm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":5,"starsCount":229,"forksCount":25,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-20T12:39:51.266Z"}},{"type":"Public","name":"lightllm","owner":"ModelTC","isFork":false,"description":"LightLLM is a Python-based LLM (Large Language Model) inference and serving framework, notable for its lightweight design, easy scalability, and high-speed performance.","allTopics":["nlp","deep-learning","llama","gpt","model-serving","llm","openai-triton"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":5,"issueCount":59,"starsCount":2304,"forksCount":190,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-19T08:46:40.357Z"}},{"type":"Public","name":"EasyLLM","owner":"ModelTC","isFork":false,"description":"Built upon Megatron-Deepspeed and HuggingFace Trainer, EasyLLM has reorganized the code logic with a focus on usability. While enhancing usability, it also ensures training efficiency.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":38,"forksCount":7,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-18T08:42:34.831Z"}},{"type":"Public","name":"mtc-token-healing","owner":"ModelTC","isFork":false,"description":"Token healing implementation in Rust","allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":3,"issueCount":0,"starsCount":3,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-16T03:40:12.732Z"}},{"type":"Public","name":"DeepSpeed","owner":"ModelTC","isFork":true,"description":"DeepSpeed is a deep learning optimization library that makes distributed training and inference easy, efficient, and effective.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":4052,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-13T03:01:13.749Z"}},{"type":"Public","name":"opencompass","owner":"ModelTC","isFork":true,"description":"OpenCompass is an LLM evaluation platform, supporting a wide range of models (Llama3, Mistral, InternLM2,GPT-4,LLaMa2, Qwen,GLM, Claude, etc) over 100+ datasets.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":404,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-06T06:18:11.049Z"}},{"type":"Public","name":"xtuner","owner":"ModelTC","isFork":true,"description":"An efficient, flexible and full-featured toolkit for fine-tuning LLM (InternLM2, Llama3, Phi3, Qwen, Mistral, ...)","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":302,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-22T06:24:55.034Z"}},{"type":"Public","name":"InternVL","owner":"ModelTC","isFork":true,"description":"[CVPR 2024 Oral] InternVL Family: A Pioneering Open-Source Alternative to GPT-4o. 接近GPT-4o表现的可商用开源多模态对话模型","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":432,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-16T03:03:27.488Z"}},{"type":"Public","name":"general-sam","owner":"ModelTC","isFork":false,"description":"A general suffix automaton implementation in Rust with Python bindings","allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":1,"issueCount":0,"starsCount":4,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-12T12:29:14.481Z"}},{"type":"Public","name":"OmniBal","owner":"ModelTC","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":15,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-09T13:28:54.767Z"}},{"type":"Public","name":"TFMQ-DM","owner":"ModelTC","isFork":false,"description":"[CVPR 2024 Highlight] This is the official PyTorch implementation of \"TFMQ-DM: Temporal Feature Maintenance Quantization for Diffusion Models\".","allTopics":["highlight","quantization","cvpr","ldm","diffusion-models","post-training-quantization","ddim","stable-diffusion","cvpr2024"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":53,"forksCount":3,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-01T07:45:51.119Z"}},{"type":"Public","name":"general-sam-py","owner":"ModelTC","isFork":false,"description":"Python bindings for general-sam and some utilities","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":0,"starsCount":3,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-22T23:17:41.867Z"}},{"type":"Public","name":"L2_Compression","owner":"ModelTC","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":11,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-16T13:11:09.322Z"}},{"type":"Public","name":"msbench","owner":"ModelTC","isFork":false,"description":"A tool for model sparse based on torch.fx","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-03T06:57:19.883Z"}},{"type":"Public","name":"MQBench","owner":"ModelTC","isFork":false,"description":"Model Quantization Benchmark","allTopics":[],"primaryLanguage":{"name":"Shell","color":"#89e051"},"pullRequestCount":5,"issueCount":3,"starsCount":754,"forksCount":137,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-03T00:54:41.967Z"}},{"type":"Public template","name":"FCPTS","owner":"ModelTC","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-14T07:19:01.008Z"}},{"type":"Public","name":"statecs","owner":"ModelTC","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-10T08:42:35.770Z"}},{"type":"Public","name":"greedy-tokenizer","owner":"ModelTC","isFork":false,"description":"Greedily tokenize strings with the longest tokens iteratively.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-27T12:02:50.777Z"}},{"type":"Public","name":"QLLM","owner":"ModelTC","isFork":false,"description":"[ICLR 2024] This is the official PyTorch implementation of \"QLLM: Accurate and Efficient Low-Bitwidth Quantization for Large Language Models\"","allTopics":["transformers","pytorch","llama","quantization","post-training-quantization","llm","llama2"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":33,"forksCount":2,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-11T02:56:00.115Z"}},{"type":"Public","name":"Dipoorlet","owner":"ModelTC","isFork":false,"description":"Offline Quantization Tools for Deploy.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":11,"starsCount":109,"forksCount":15,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-28T08:59:47.725Z"}},{"type":"Public","name":"awesome-lm-system","owner":"ModelTC","isFork":false,"description":"Summary of system papers/frameworks/codes/tools on training or serving large model","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":56,"forksCount":5,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-17T10:24:11.923Z"}},{"type":"Public","name":"LPCV_2023_solution","owner":"ModelTC","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":18,"forksCount":2,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-29T06:40:44.597Z"}},{"type":"Public","name":"Outlier_Suppression_Plus","owner":"ModelTC","isFork":false,"description":"Official implementation of the EMNLP23 paper: Outlier Suppression+: Accurate quantization of large language models by equivalent and optimal shifting and scaling","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":39,"forksCount":2,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-21T20:55:56.108Z"}},{"type":"Public","name":"UP_LPCV2023_Plugin","owner":"ModelTC","isFork":true,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-11T07:55:39.936Z"}},{"type":"Public","name":"ChatGLM-6B","owner":"ModelTC","isFork":true,"description":"ChatGLM-6B: An Open Bilingual Dialogue Language Model | 开源双语对话语言模型","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":5186,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-06-20T06:03:18.228Z"}},{"type":"Public","name":"pyvlova","owner":"ModelTC","isFork":false,"description":"Yet another Polyhedra Compiler for DeepLearning","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":19,"forksCount":4,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-04-14T17:28:07.956Z"}},{"type":"Public","name":"systemnoise_web","owner":"ModelTC","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-04-03T10:29:28.939Z"}},{"type":"Public","name":"NART","owner":"ModelTC","isFork":false,"description":"NART = NART is not A RunTime, a deep learning inference framework.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":38,"forksCount":14,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-03-02T14:50:37.065Z"}},{"type":"Public","name":"United-Perception","owner":"ModelTC","isFork":false,"description":"United Perception","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":27,"starsCount":426,"forksCount":65,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-12-05T05:12:54.221Z"}},{"type":"Public","name":"AAAI2023_EAMPD","owner":"ModelTC","isFork":false,"description":"AAAI2023 Efficient and Accurate Models towards Practical Deep Learning Baseline","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":2,"starsCount":13,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-11-29T13:51:35.232Z"}}],"repositoryCount":39,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"ModelTC repositories"}