From a7650be502c52903f2e3cdfc82abb923b52decb6 Mon Sep 17 00:00:00 2001 From: DustinWin Date: Sat, 28 Dec 2024 02:37:50 +0800 Subject: [PATCH] Update domain-list-custom --- .github/dependabot.yml | 24 +++ .github/workflows/build.yml | 234 +++++++++++++++++++++++ .gitignore | 15 ++ LICENSE | 21 +++ README.md | 26 +++ common.go | 113 +++++++++++ go.mod | 14 ++ go.sum | 20 ++ listinfo.go | 362 ++++++++++++++++++++++++++++++++++++ listinfomap.go | 132 +++++++++++++ main.go | 133 +++++++++++++ trie.go | 73 ++++++++ 12 files changed, 1167 insertions(+) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/build.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 common.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 listinfo.go create mode 100644 listinfomap.go create mode 100644 main.go create mode 100644 trie.go diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..aafa0fd4 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,24 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + timezone: "Asia/Shanghai" + time: "07:00" + pull-request-branch-name: + separator: "-" + + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "daily" + timezone: "Asia/Shanghai" + time: "07:00" + pull-request-branch-name: + separator: "-" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..ffcab096 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,234 @@ +name: Build domains +on: + workflow_dispatch: + schedule: + - cron: "0 18 * * *" + push: + branches: + - master + paths-ignore: + - "**/README.md" +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Set variables + run: | + echo "update_version=$(date -d '+8 hours' +%Y-%m-%d)" >> ${GITHUB_ENV} + echo "fakeip_filter=https://raw.githubusercontent.com/juewuy/ShellCrash/dev/public/fake_ip_filter.list" >> $GITHUB_ENV + echo "applications1=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Download/Download.list" >> $GITHUB_ENV + echo "applications2=https://raw.githubusercontent.com/Loyalsoldier/clash-rules/release/applications.txt" >> $GITHUB_ENV + echo "private1=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Lan/Lan.list" >> $GITHUB_ENV + echo "private2=https://raw.githubusercontent.com/XIU2/TrackersListCollection/master/all.txt" >> $GITHUB_ENV + echo "ads=https://raw.githubusercontent.com/privacy-protection-tools/anti-AD/master/anti-ad-clash.yaml" >> $GITHUB_ENV + echo "games_cn1=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/SteamCN/SteamCN.list" >> $GITHUB_ENV + echo "games_cn2=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Game/GameDownloadCN/GameDownloadCN.list" >> $GITHUB_ENV + echo "netflix=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Netflix/Netflix.list" >> $GITHUB_ENV + echo "disney=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Disney/Disney.list" >> $GITHUB_ENV + echo "max=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/HBO/HBO.list" >> $GITHUB_ENV + echo "primevideo=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/PrimeVideo/PrimeVideo.list" >> $GITHUB_ENV + echo "appletv=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/AppleTV/AppleTV.list" >> $GITHUB_ENV + echo "youtube=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/YouTube/YouTube.list" >> $GITHUB_ENV + echo "tiktok=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/TikTok/TikTok.list" >> $GITHUB_ENV + echo "bilibili=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/BiliBili/BiliBili.list" >> $GITHUB_ENV + echo "ai1=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/OpenAI/OpenAI.list" >> $GITHUB_ENV + echo "ai2=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Copilot/Copilot.list" >> $GITHUB_ENV + echo "ai3=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Gemini/Gemini.list" >> $GITHUB_ENV + echo "ai4=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Claude/Claude.list" >> $GITHUB_ENV + echo "networktest=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Speedtest/Speedtest.list" >> $GITHUB_ENV + echo "proxy=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/Global/Global.list" >> $GITHUB_ENV + echo "cn=https://raw.githubusercontent.com/blackmatrix7/ios_rule_script/master/rule/Clash/ChinaMax/ChinaMax.list" >> $GITHUB_ENV + shell: bash + + - name: Checkout codebase + uses: actions/checkout@master + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: ./go.mod + + - name: Checkout v2fly/domain-list-community + uses: actions/checkout@v4 + with: + repository: v2fly/domain-list-community + path: community + + - name: Append attribute domains + run: | + echo "include:geolocation-!cn @cn" >> ./community/data/cn + echo "include:geolocation-cn @!cn" >> ./community/data/geolocation-\!cn + + - name: Generate `mihomo` geosite.dat and domains(.txt files) + run: | + go run ./ --datapath=./community/data/ + + - name: Generate fakeip-filter + run: | + mkdir -p ./tmp/ ./domains/ + curl -sSL ${fakeip_filter} | grep -Ev '#|\+|\*|Mijia' | sed 's/^/DOMAIN,/' > ./tmp/temp-fakeip-filter.txt + curl -sSL ${fakeip_filter} | grep -v '.\*' | grep -E '\*|\+' | sed 's/^[*+]\./DOMAIN-SUFFIX,/' >> ./tmp/temp-fakeip-filter.txt + curl -sSL ${fakeip_filter} | grep '.\*' | grep -E '\*|\+' | sed -e 's/^*/.\*/' -e 's/^+/.\+/' -e 's/^/DOMAIN-REGEX,/' >> ./tmp/temp-fakeip-filter.txt + sort --ignore-case ./tmp/temp-fakeip-filter.txt > ./domains/fakeip-filter.list + + - name: Generate fakeip-filter-lite + run: | + cat < ./tmp/temp-fakeip-filter-lite.txt + DOMAIN,adguardteam.github.io + DOMAIN,adrules.top + DOMAIN,anti-ad.net + DOMAIN,local.adguard.org + DOMAIN,static.adtidy.org + DOMAIN-SUFFIX,market.xiaomi.com + DOMAIN-SUFFIX,steamcontent.com + EOF + curl -sSL ${fakeip_filter} | grep -E 'ntp|time|stun' | grep -v '.\*' | grep -E '\*|\+' | sed 's/^[*+]\./DOMAIN-SUFFIX,/' >> ./tmp/temp-fakeip-filter-lite.txt + curl -sSL ${fakeip_filter} | grep -E 'ntp|time|stun' | grep '.\*' | grep -E '\*|\+' | sed -e 's/^*/.\*/' -e 's/^+/.\+/' -e 's/^/DOMAIN-REGEX,/' >> ./tmp/temp-fakeip-filter-lite.txt + sort --ignore-case ./tmp/temp-fakeip-filter-lite.txt > ./domains/fakeip-filter-lite.list + + - name: Generate applications + run: | + curl -sSL ${applications1} | grep -Ev '#|IP-' > ./tmp/temp-applications.txt + curl -sSL ${applications2} | grep -v 'payload:' | sed -e 's/^[ \t]*- //' >> ./tmp/temp-applications.txt + sort --ignore-case -u ./tmp/temp-applications.txt > ./domains/applications.list + + - name: Generate private + run: | + cat < ./tmp/temp-private.txt + DOMAIN,yacd.haishan.me + DOMAIN,yacd.metacubex.one + DOMAIN,d.metacubex.one + DOMAIN,metacubex.github.io + DOMAIN,metacubexd.pages.dev + DOMAIN,board.zash.run.place + EOF + sed -e 's/^full:/DOMAIN,/' -e 's/^domain:/DOMAIN-SUFFIX,/' -e 's/^keyword:/DOMAIN-KEYWORD,/' -e 's/^regexp:/DOMAIN-REGEX,/' ./publish/private.txt >> ./tmp/temp-private.txt + curl -sSL ${private1} | grep -Ev '#|IP-' >> ./tmp/temp-private.txt + curl -sSL ${private2} | grep -i '\.[A-Z]' | awk -F'[/:]' '{print $4}' | sed 's/^/DOMAIN,/' >> ./tmp/temp-private.txt + sort --ignore-case -u ./tmp/temp-private.txt > ./domains/private.list + + - name: Generate ads + run: | + curl -sSL ${ads} | grep -Ev '#|payload:' | sed -e '/^\s*$/d' -e "s/'$//" -e 's/^[ \t]*//' -e "s/^- '+\./DOMAIN-SUFFIX,/" -e "s/^- '/DOMAIN,/" | sort --ignore-case > ./domains/ads.list + + - name: Generate microsoft-cn + run: | + grep '@cn$' ./publish/microsoft.txt | sed -e 's/^full:/DOMAIN,/' -e 's/^domain:/DOMAIN-SUFFIX,/' -e 's/^keyword:/DOMAIN-KEYWORD,/' -e 's/^regexp:/DOMAIN-REGEX,/' -e 's/:@cn$//' | sort --ignore-case > ./domains/microsoft-cn.list + + - name: Generate apple-cn + run: | + grep '@cn$' ./publish/apple.txt | sed -e 's/^full:/DOMAIN,/' -e 's/^domain:/DOMAIN-SUFFIX,/' -e 's/^keyword:/DOMAIN-KEYWORD,/' -e 's/^regexp:/DOMAIN-REGEX,/' -e 's/:@cn$//' | sort --ignore-case > ./domains/apple-cn.list + + - name: Generate google-cn + run: | + grep '@cn$' ./publish/google.txt | sed -e 's/^full:/DOMAIN,/' -e 's/^domain:/DOMAIN-SUFFIX,/' -e 's/^keyword:/DOMAIN-KEYWORD,/' -e 's/^regexp:/DOMAIN-REGEX,/' -e 's/:@cn$//' | sort --ignore-case > ./domains/google-cn.list + + - name: Generate games-cn + run: | + grep '@cn$' ./publish/category-games.txt | sed -e 's/^full:/DOMAIN,/' -e 's/^domain:/DOMAIN-SUFFIX,/' -e 's/^keyword:/DOMAIN-KEYWORD,/' -e 's/^regexp:/DOMAIN-REGEX,/' -e 's/:@cn$//' > ./tmp/temp-games-cn.txt + curl -sSL ${games_cn1} | grep -Ev '#|IP-' >> ./tmp/temp-games-cn.txt + curl -sSL ${games_cn2} | grep -Ev '#|IP-' >> ./tmp/temp-games-cn.txt + sort --ignore-case -u ./tmp/temp-games-cn.txt > ./domains/games-cn.list + + - name: Generate netflix + run: | + curl -sSL ${netflix} | grep -Ev '#|IP-' | sort --ignore-case > ./domains/netflix.list + + - name: Generate disney + run: | + curl -sSL ${disney} | grep -Ev '#|IP-' | sort --ignore-case > ./domains/disney.list + + - name: Generate max + run: | + curl -sSL ${max} | grep -Ev '#|IP-' | sort --ignore-case > ./domains/max.list + + - name: Generate primevideo + run: | + curl -sSL ${primevideo} | grep -Ev '#|IP-' | sort --ignore-case > ./domains/primevideo.list + + - name: Generate appletv + run: | + curl -sSL ${appletv} | grep -Ev '#|IP-' | sort --ignore-case > ./domains/appletv.list + + - name: Generate youtube + run: | + curl -sSL ${youtube} | grep -Ev '#|IP-' | sort --ignore-case > ./domains/youtube.list + + - name: Generate tiktok + run: | + curl -sSL ${tiktok} | grep -Ev '#|IP-' | sort --ignore-case > ./domains/tiktok.list + + - name: Generate bilibili + run: | + curl -sSL ${bilibili} | grep -Ev '#|IP-' | sort --ignore-case > ./domains/bilibili.list + + - name: Generate ai + run: | + curl -sSL ${ai1} | grep -Ev '#|IP-' > ./tmp/temp-ai.txt + curl -sSL ${ai2} | grep -Ev '#|IP-' >> ./tmp/temp-ai.txt + curl -sSL ${ai3} | grep -Ev '#|IP-' >> ./tmp/temp-ai.txt + curl -sSL ${ai4} | grep -Ev '#|IP-' >> ./tmp/temp-ai.txt + sort --ignore-case -u ./tmp/temp-ai.txt > ./domains/ai.list + + - name: Generate networktest + run: | + cat < ./tmp/temp-networktest.txt + DOMAIN-KEYWORD,ipv6-test + DOMAIN-KEYWORD,ipv6test + DOMAIN-KEYWORD,test-ipv6 + DOMAIN-KEYWORD,testipv6 + EOF + curl -sSL ${networktest} | grep -Ev '#|IP-' >> ./tmp/temp-networktest.txt + sort --ignore-case -u ./tmp/temp-networktest.txt > ./domains/networktest.list + + - name: Generate proxy + run: | + sed -e 's/^full:/DOMAIN,/' -e 's/^domain:/DOMAIN-SUFFIX,/' -e 's/^keyword:/DOMAIN-KEYWORD,/' -e 's/^regexp:/DOMAIN-REGEX,/' -e 's/:@\!cn$//' ./publish/geolocation-\!cn.txt >> ./tmp/temp-proxy.txt + curl -sSL ${proxy} | grep -Ev '#|IP-' >> ./tmp/temp-proxy.txt + sort --ignore-case -u ./tmp/temp-proxy.txt > ./domains/proxy.list + + - name: Generate cn + run: | + sed -e 's/^full:/DOMAIN,/' -e 's/^domain:/DOMAIN-SUFFIX,/' -e 's/^keyword:/DOMAIN-KEYWORD,/' -e 's/^regexp:/DOMAIN-REGEX,/' -e 's/:@cn$//' ./publish/cn.txt > ./tmp/temp-cn.txt + curl -sSL ${cn} | grep -Ev '#|IP-' >> ./tmp/temp-cn.txt + sort --ignore-case -u ./tmp/temp-cn.txt > ./domains/cn.list + rm -rf ./tmp* ./publish* + + - name: Release and upload `domains` assets + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + release_name: domains + tag: domains + overwrite: true + body: | + [mihomo](https://github.com/MetaCubeX/mihomo) rule-set 规则集文件,规则类型包含 `DOMAIN`、`DOMAIN-SUFFIX`、`DOMAIN-KEYWORD`、`DOMAIN-REGEX` 和 `PROCESS-NAME` + 规则集文件更新于 ${{ env.update_version }} + file_glob: true + file: ./domains/* + + - name: Git push assets to `domains` branch + run: | + cd ./domains/ || exit 1 + git init + git config --local user.name "github-actions[bot]" + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git checkout -b domains + git add . && git commit -m "mihomo rule-set 规则集文件更新于 ${update_version}" + git remote add origin "https://${{ github.actor }}:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}" + git push -f -u origin domains + + - name: Purge jsDelivr CDN + run: | + cd ./domains/ || exit 1 + for file in $(ls); do + curl -i "https://purge.jsdelivr.net/gh/${{ github.repository }}@domains/${file}" + done + + - name: Delete old workflow runs + uses: Mattraks/delete-workflow-runs@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + repository: ${{ github.repository }} + retain_days: 3 + keep_minimum_runs: 1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..66fd13c9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f42c4c20 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Loyalsoldier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..749a6810 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# 文件说明 +## 1. 文件类型 +[mihomo](https://github.com/MetaCubeX/mihomo) rule-set 规则集文件(.list 格式),包含 `DOMAIN`、`DOMAIN-SUFFIX`、`DOMAIN-KEYWORD`、`DOMAIN-REGEX` 和 `PROCESS-NAME` 规则类型,适用于 `behavior: classical` 且 `format: text` 的使用场景 +## 2. 数据源 +① 每天凌晨 2 点(北京时间 UTC+8)自动构建 +② **`fakeip-filter.list`** 源采用 [ShellCrash/public/fake_ip_filter.list](https://github.com/juewuy/ShellCrash/blob/dev/public/fake_ip_filter.list) +③ **`fakeip-filter-lite.list`** 源采用 [ShellCrash/public/fake_ip_filter.list](https://github.com/juewuy/ShellCrash/blob/dev/public/fake_ip_filter.list),仅保留主要域名(推荐搭配 [AdGuard Home](https://github.com/AdguardTeam/AdGuardHome) 且 DNS 配置 mix 混合模式时使用) +④ **`applications.list`** 源采用 [blackmatrix7/ios_rule_script/Download](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Download) 和 [Loyalsoldier/clash-rules/applications.txt](https://github.com/Loyalsoldier/clash-rules/blob/release/applications.txt) 组合 +⑤ **`private.txt`** 源采用 [v2fly/domain-list-community/private](https://github.com/v2fly/domain-list-community/blob/master/data/private)、[blackmatrix7/ios_rule_script/Lan](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Lan)(仅域名)和 [TrackersList](https://github.com/XIU2/TrackersListCollection/blob/master/all.txt)(仅域名)组合,并添加主流 [Dashboard 在线面板](https://github.com/DustinWin/proxy-tools/releases/tag/Dashboard)域名(`yacd.haishan.me`、`yacd.metacubex.one`、`d.metacubex.one`、`metacubex.github.io`、`metacubexd.pages.dev` 和 `board.zash.run.place`) +⑥ **`ads.list`** 源采用 [privacy-protection-tools/anti-AD](https://github.com/privacy-protection-tools/anti-AD) +⑦ **`microsoft-cn.list`** 源采用 [v2fly/domain-list-community/microsoft@cn](https://github.com/v2fly/domain-list-community/blob/master/data/microsoft) +⑧ **`apple-cn.list`** 源采用 [v2fly/domain-list-community/apple@cn](https://github.com/v2fly/domain-list-community/blob/master/data/apple) +⑨ **`google-cn.list`** 源采用 [v2fly/domain-list-community/google@cn](https://github.com/v2fly/domain-list-community/blob/master/data/google) +⑩ **`games-cn.list`** 源采用 [v2fly/domain-list-community/category-games@cn](https://github.com/v2fly/domain-list-community/blob/master/data/category-games)、[blackmatrix7/ios_rule_script/SteamCN](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/SteamCN) 和 [blackmatrix7/ios_rule_script/GameDownloadCN](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Game/GameDownloadCN) 组合 +⑪ **`netflix.list`** 源采用 [blackmatrix7/ios_rule_script/Netflix](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Netflix)(仅域名) +⑫ **`disney.list`** 源采用 [blackmatrix7/ios_rule_script/Disney](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Disney) +⑬ **`max.list`** 源采用 [blackmatrix7/ios_rule_script/HBO](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/HBO) +⑭ **`primevideo.list`** 源采用 [blackmatrix7/ios_rule_script/PrimeVideo](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/PrimeVideo) +⑮ **`appletv.list`** 源采用 [blackmatrix7/ios_rule_script/AppleTV](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/AppleTV) +⑯ **`youtube.list`** 源采用 [blackmatrix7/ios_rule_script/YouTube](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/YouTube) +⑰ **`tiktok.list`** 源采用 [blackmatrix7/ios_rule_script/TikTok](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/TikTok) +⑱ **`bilibili.list`** 源采用 [blackmatrix7/ios_rule_script/BiliBili](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/BiliBili) +⑲ **`ai.list`** 源采用 [blackmatrix7/ios_rule_script/OpenAI](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/OpenAI)、[blackmatrix7/ios_rule_script/Copilot](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Copilot)、[blackmatrix7/ios_rule_script/Gemini](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Gemini) 和 [blackmatrix7/ios_rule_script/Claude](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Claude) 组合 +⑳ **`networktest.list`** 源采用 [blackmatrix7/ios_rule_script/Speedtest](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Speedtest) 和 IPv6 测试域名关键字(`keyword`,包括:`ipv6-test`、`test-ipv6`、`ipv6test` 和 `testipv6`)组合 +㉑ **`proxy.list`** 源采用 [v2fly/domain-list-community/geolocation-!cn](https://github.com/v2fly/domain-list-community/blob/master/data/geolocation-!cn)(删除了带有 `@cn` 和 `@ads` 的域名,并新增了 [gfwlist](https://github.com/gfwlist/gfwlist) 和 v2fly/domain-list-community/cn 中带有 `@!cn` 的域名)和 [blackmatrix7/ios_rule_script/Global](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/Global) 组合 +㉒ **`cn.list`** 源采用 [v2fly/domain-list-community/cn](https://github.com/v2fly/domain-list-community/blob/master/data/cn)(删除了带有 `@!cn` 和 `@ads` 的域名,并新增了 v2fly/domain-list-community/geolocation-!cn 中带有 `@cn` 的域名)和 [blackmatrix7/ios_rule_script/ChinaMax](https://github.com/blackmatrix7/ios_rule_script/tree/master/rule/Clash/ChinaMax)(仅域名)组合 diff --git a/common.go b/common.go new file mode 100644 index 00000000..dff2c13e --- /dev/null +++ b/common.go @@ -0,0 +1,113 @@ +package main + +import ( + "fmt" + "go/build" + "os" + "path/filepath" + "strings" +) + +type fileName string + +type attribute string + +// GetDataDir returns the path to the "data" directory used to generate lists. +// Usage order: +// 1. The datapath that user set when running the program +// 2. The default path "./data" (data directory in the current working directory) if exists +// 3. The path to the data directory of project `v2fly/domain-list-community` in GOPATH mode +func GetDataDir() string { + if *dataPath != "" { // Use dataPath option if set by user + fmt.Printf("Use domain list files in '%s' directory.\n", *dataPath) + return *dataPath + } + + defaultDataDir := filepath.Join("./", "data") + if _, err := os.Stat(defaultDataDir); !os.IsNotExist(err) { // Use "./data" directory if exists + fmt.Printf("Use domain list files in '%s' directory.\n", defaultDataDir) + return defaultDataDir + } + + return filepath.Join(GetGOPATH(), "src", "github.com", "v2fly", "domain-list-community", "data") +} + +// envFile returns the name of the Go environment configuration file. +// Copy from https://github.com/golang/go/blob/c4f2a9788a7be04daf931ac54382fbe2cb754938/src/cmd/go/internal/cfg/cfg.go#L150-L166 +func envFile() (string, error) { + if file := os.Getenv("GOENV"); file != "" { + if file == "off" { + return "", fmt.Errorf("GOENV=off") + } + return file, nil + } + dir, err := os.UserConfigDir() + if err != nil { + return "", err + } + if dir == "" { + return "", fmt.Errorf("missing user-config dir") + } + return filepath.Join(dir, "go", "env"), nil +} + +// GetRuntimeEnv returns the value of runtime environment variable, +// that is set by running following command: `go env -w key=value`. +func GetRuntimeEnv(key string) (string, error) { + file, err := envFile() + if err != nil { + return "", err + } + if file == "" { + return "", fmt.Errorf("missing runtime env file") + } + var data []byte + var runtimeEnv string + data, readErr := os.ReadFile(file) + if readErr != nil { + return "", readErr + } + envStrings := strings.Split(string(data), "\n") + for _, envItem := range envStrings { + envItem = strings.TrimSuffix(envItem, "\r") + envKeyValue := strings.Split(envItem, "=") + if strings.EqualFold(strings.TrimSpace(envKeyValue[0]), key) { + runtimeEnv = strings.TrimSpace(envKeyValue[1]) + } + } + return runtimeEnv, nil +} + +// GetGOPATH returns GOPATH environment variable as a string. It will NOT be empty. +func GetGOPATH() string { + // The one set by user explicitly by `export GOPATH=/path` or `env GOPATH=/path command` + GOPATH := os.Getenv("GOPATH") + if GOPATH == "" { + var err error + // The one set by user by running `go env -w GOPATH=/path` + GOPATH, err = GetRuntimeEnv("GOPATH") + if err != nil { + // The default one that Golang uses + return build.Default.GOPATH + } + if GOPATH == "" { + return build.Default.GOPATH + } + return GOPATH + } + return GOPATH +} + +// isEmpty checks if the rule that has been trimmed out spaces is empty +func isEmpty(s string) bool { + return len(strings.TrimSpace(s)) == 0 +} + +// removeComment removes comments in the rule +func removeComment(line string) string { + idx := strings.Index(line, "#") + if idx == -1 { + return line + } + return strings.TrimSpace(line[:idx]) +} diff --git a/go.mod b/go.mod new file mode 100644 index 00000000..feaf7b08 --- /dev/null +++ b/go.mod @@ -0,0 +1,14 @@ +module domain-list-custom + +go 1.23.4 + +require ( + github.com/v2fly/v2ray-core/v5 v5.23.0 + google.golang.org/protobuf v1.36.1 +) + +require ( + github.com/adrg/xdg v0.5.3 // indirect + github.com/golang/protobuf v1.5.4 // indirect + golang.org/x/sys v0.28.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 00000000..99591ef2 --- /dev/null +++ b/go.sum @@ -0,0 +1,20 @@ +github.com/adrg/xdg v0.5.3 h1:xRnxJXne7+oWDatRhR1JLnvuccuIeCoBu2rtuLqQB78= +github.com/adrg/xdg v0.5.3/go.mod h1:nlTsY+NNiCBGCK2tpm09vRqfVzrc2fLmXGpBLF0zlTQ= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/v2fly/v2ray-core/v5 v5.23.0 h1:VJ72oUaLS6nKmSA9M6qxcewNdU+2zZrBMZZOMGBpi/A= +github.com/v2fly/v2ray-core/v5 v5.23.0/go.mod h1:+fUnQNUcYE21wFWq4LW9LPffq3u3+OliiFQLAYeh+eU= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= +google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/listinfo.go b/listinfo.go new file mode 100644 index 00000000..9a0f58a1 --- /dev/null +++ b/listinfo.go @@ -0,0 +1,362 @@ +package main + +import ( + "bufio" + "errors" + "fmt" + "os" + "sort" + "strings" + "time" + + router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" +) + +// ListInfo is the information structure of a single file in data directory. +// It includes all types of rules of the file, as well as servel types of +// sturctures of same items for convenience in later process. +type ListInfo struct { + Name fileName + HasInclusion bool + InclusionAttributeMap map[fileName][]attribute + FullTypeList []*router.Domain + KeywordTypeList []*router.Domain + RegexpTypeList []*router.Domain + AttributeRuleUniqueList []*router.Domain + DomainTypeList []*router.Domain + DomainTypeUniqueList []*router.Domain + AttributeRuleListMap map[attribute][]*router.Domain + GeoSite *router.GeoSite +} + +// NewListInfo return a ListInfo +func NewListInfo() *ListInfo { + return &ListInfo{ + InclusionAttributeMap: make(map[fileName][]attribute), + FullTypeList: make([]*router.Domain, 0, 10), + KeywordTypeList: make([]*router.Domain, 0, 10), + RegexpTypeList: make([]*router.Domain, 0, 10), + AttributeRuleUniqueList: make([]*router.Domain, 0, 10), + DomainTypeList: make([]*router.Domain, 0, 10), + DomainTypeUniqueList: make([]*router.Domain, 0, 10), + AttributeRuleListMap: make(map[attribute][]*router.Domain), + } +} + +// ProcessList processes each line of every single file in the data directory +// and generates a ListInfo of each file. +func (l *ListInfo) ProcessList(file *os.File) error { + scanner := bufio.NewScanner(file) + // Parse a file line by line to generate ListInfo + for scanner.Scan() { + line := scanner.Text() + if isEmpty(line) { + continue + } + line = removeComment(line) + if isEmpty(line) { + continue + } + parsedRule, err := l.parseRule(line) + if err != nil { + return err + } + if parsedRule == nil { + continue + } + l.classifyRule(parsedRule) + } + if err := scanner.Err(); err != nil { + return err + } + + return nil +} + +// parseRule parses a single rule +func (l *ListInfo) parseRule(line string) (*router.Domain, error) { + line = strings.TrimSpace(line) + + if line == "" { + return nil, errors.New("empty line") + } + + // Parse `include` rule first, eg: `include:google`, `include:google @cn @gfw` + if strings.HasPrefix(line, "include:") { + l.parseInclusion(line) + return nil, nil + } + + parts := strings.Split(line, " ") + ruleWithType := strings.TrimSpace(parts[0]) + if ruleWithType == "" { + return nil, errors.New("empty rule") + } + + var rule router.Domain + if err := l.parseTypeRule(ruleWithType, &rule); err != nil { + return nil, err + } + + for _, attrString := range parts[1:] { + if attrString = strings.TrimSpace(attrString); attrString != "" { + attr, err := l.parseAttribute(attrString) + if err != nil { + return nil, err + } + rule.Attribute = append(rule.Attribute, attr) + } + } + + return &rule, nil +} + +func (l *ListInfo) parseInclusion(inclusion string) { + inclusionVal := strings.TrimPrefix(strings.TrimSpace(inclusion), "include:") + l.HasInclusion = true + inclusionValSlice := strings.Split(inclusionVal, "@") + filename := fileName(strings.ToUpper(strings.TrimSpace(inclusionValSlice[0]))) + switch len(inclusionValSlice) { + case 1: // Inclusion without attribute + // Use '@' as the placeholder attribute for 'include:filename' + l.InclusionAttributeMap[filename] = append(l.InclusionAttributeMap[filename], attribute("@")) + default: // Inclusion with attribute(s) + // support new inclusion syntax, eg: `include:google @cn @gfw` + for _, attr := range inclusionValSlice[1:] { + attr = strings.ToLower(strings.TrimSpace(attr)) + if attr != "" { + // Added in this format: '@cn' + l.InclusionAttributeMap[filename] = append(l.InclusionAttributeMap[filename], attribute("@"+attr)) + } + } + } +} + +func (l *ListInfo) parseTypeRule(domain string, rule *router.Domain) error { + kv := strings.Split(domain, ":") + switch len(kv) { + case 1: // line without type prefix + rule.Type = router.Domain_RootDomain + rule.Value = strings.ToLower(strings.TrimSpace(kv[0])) + case 2: // line with type prefix + ruleType := strings.TrimSpace(kv[0]) + ruleVal := strings.TrimSpace(kv[1]) + rule.Value = strings.ToLower(ruleVal) + switch strings.ToLower(ruleType) { + case "full": + rule.Type = router.Domain_Full + case "domain": + rule.Type = router.Domain_RootDomain + case "keyword": + rule.Type = router.Domain_Plain + case "regexp": + rule.Type = router.Domain_Regex + rule.Value = ruleVal + default: + return errors.New("unknown domain type: " + ruleType) + } + } + return nil +} + +func (l *ListInfo) parseAttribute(attr string) (*router.Domain_Attribute, error) { + if attr[0] != '@' { + return nil, errors.New("invalid attribute: " + attr) + } + attr = attr[1:] // Trim out attribute prefix `@` character + + var attribute router.Domain_Attribute + attribute.Key = strings.ToLower(attr) + attribute.TypedValue = &router.Domain_Attribute_BoolValue{BoolValue: true} + return &attribute, nil +} + +// classifyRule classifies a single rule and write into *ListInfo +func (l *ListInfo) classifyRule(rule *router.Domain) { + if len(rule.Attribute) > 0 { + l.AttributeRuleUniqueList = append(l.AttributeRuleUniqueList, rule) + var attrsString attribute + for _, attr := range rule.Attribute { + attrsString += attribute("@" + attr.GetKey()) // attrsString will be "@cn@ads" if there are more than one attributes + } + l.AttributeRuleListMap[attrsString] = append(l.AttributeRuleListMap[attrsString], rule) + } else { + switch rule.Type { + case router.Domain_Full: + l.FullTypeList = append(l.FullTypeList, rule) + case router.Domain_RootDomain: + l.DomainTypeList = append(l.DomainTypeList, rule) + case router.Domain_Plain: + l.KeywordTypeList = append(l.KeywordTypeList, rule) + case router.Domain_Regex: + l.RegexpTypeList = append(l.RegexpTypeList, rule) + } + } +} + +// Flatten flattens the rules in a file that have "include" syntax +// in data directory, and adds those need-to-included rules into it. +// This feature supports the "include:filename@attribute" syntax. +// It also generates a domain trie of domain-typed rules for each file +// to remove duplications of them. +func (l *ListInfo) Flatten(lm *ListInfoMap) error { + if l.HasInclusion { + for filename, attrs := range l.InclusionAttributeMap { + for _, attrWanted := range attrs { + includedList := (*lm)[filename] + switch string(attrWanted) { + case "@": + l.FullTypeList = append(l.FullTypeList, includedList.FullTypeList...) + l.DomainTypeList = append(l.DomainTypeList, includedList.DomainTypeList...) + l.KeywordTypeList = append(l.KeywordTypeList, includedList.KeywordTypeList...) + l.RegexpTypeList = append(l.RegexpTypeList, includedList.RegexpTypeList...) + l.AttributeRuleUniqueList = append(l.AttributeRuleUniqueList, includedList.AttributeRuleUniqueList...) + for attr, domainList := range includedList.AttributeRuleListMap { + l.AttributeRuleListMap[attr] = append(l.AttributeRuleListMap[attr], domainList...) + } + + default: + for attr, domainList := range includedList.AttributeRuleListMap { + // If there are more than one attribute attached to the rule, + // the attribute key of AttributeRuleListMap in ListInfo + // will be like: "@cn@ads". + // So if to extract rules with a specific attribute, it is necessary + // also to test the multi-attribute keys of AttributeRuleListMap. + // Notice: if "include:google @cn" and "include:google @ads" appear + // at the same time in the parent list. There are chances that the same + // rule with that two attributes(`@cn` and `@ads`) will be included twice in the parent list. + if strings.Contains(string(attr)+"@", string(attrWanted)+"@") { + l.AttributeRuleListMap[attr] = append(l.AttributeRuleListMap[attr], domainList...) + l.AttributeRuleUniqueList = append(l.AttributeRuleUniqueList, domainList...) + } + } + } + } + } + } + + sort.Slice(l.DomainTypeList, func(i, j int) bool { + return len(strings.Split(l.DomainTypeList[i].GetValue(), ".")) < len(strings.Split(l.DomainTypeList[j].GetValue(), ".")) + }) + + trie := NewDomainTrie() + for _, domain := range l.DomainTypeList { + success, err := trie.Insert(domain.GetValue()) + if err != nil { + return err + } + if success { + l.DomainTypeUniqueList = append(l.DomainTypeUniqueList, domain) + } + } + + return nil +} + +// ToGeoSite converts every ListInfo into a router.GeoSite structure. +// It also excludes rules with certain attributes in certain files that +// user specified in command line when runing the program. +func (l *ListInfo) ToGeoSite(excludeAttrs map[fileName]map[attribute]bool) { + geosite := new(router.GeoSite) + geosite.CountryCode = string(l.Name) + geosite.Domain = append(geosite.Domain, l.FullTypeList...) + geosite.Domain = append(geosite.Domain, l.DomainTypeUniqueList...) + geosite.Domain = append(geosite.Domain, l.RegexpTypeList...) + + for _, keywordRule := range l.KeywordTypeList { + if len(strings.TrimSpace(keywordRule.GetValue())) > 0 { + geosite.Domain = append(geosite.Domain, keywordRule) + } + } + + if excludeAttrs != nil && excludeAttrs[l.Name] != nil { + excludeAttrsMap := excludeAttrs[l.Name] + for _, domain := range l.AttributeRuleUniqueList { + ifKeep := true + for _, attr := range domain.GetAttribute() { + if excludeAttrsMap[attribute(attr.GetKey())] { + ifKeep = false + break + } + } + if ifKeep { + geosite.Domain = append(geosite.Domain, domain) + } + } + } else { + geosite.Domain = append(geosite.Domain, l.AttributeRuleUniqueList...) + } + l.GeoSite = geosite +} + +// ToPlainText convert router.GeoSite structure to plaintext format. +func (l *ListInfo) ToPlainText() []byte { + plaintextBytes := make([]byte, 0, 1024*512) + + for _, rule := range l.GeoSite.Domain { + ruleVal := strings.TrimSpace(rule.GetValue()) + if len(ruleVal) == 0 { + continue + } + + var ruleString string + switch rule.Type { + case router.Domain_Full: + ruleString = "full:" + ruleVal + case router.Domain_RootDomain: + ruleString = "domain:" + ruleVal + case router.Domain_Plain: + ruleString = "keyword:" + ruleVal + case router.Domain_Regex: + ruleString = "regexp:" + ruleVal + } + + if len(rule.Attribute) > 0 { + ruleString += ":" + for _, attr := range rule.Attribute { + ruleString += "@" + attr.GetKey() + "," + } + ruleString = strings.TrimRight(ruleString, ",") + } + // Output format is: type:domain.tld:@attr1,@attr2 + plaintextBytes = append(plaintextBytes, []byte(ruleString+"\n")...) + } + + return plaintextBytes +} + +// ToGFWList converts router.GeoSite to GFWList format. +func (l *ListInfo) ToGFWList() []byte { + loc, _ := time.LoadLocation("Asia/Shanghai") + timeString := fmt.Sprintf("! Last Modified: %s\n", time.Now().In(loc).Format(time.RFC1123)) + + gfwlistBytes := make([]byte, 0, 1024*512) + gfwlistBytes = append(gfwlistBytes, []byte("[AutoProxy 0.2.9]\n")...) + gfwlistBytes = append(gfwlistBytes, []byte(timeString)...) + gfwlistBytes = append(gfwlistBytes, []byte("! Expires: 24h\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("! HomePage: https://github.com/Loyalsoldier/domain-list-custom\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("! GitHub URL: https://raw.githubusercontent.com/Loyalsoldier/domain-list-custom/release/gfwlist.txt\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("! jsdelivr URL: https://cdn.jsdelivr.net/gh/Loyalsoldier/domain-list-custom@release/gfwlist.txt\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("\n")...) + + for _, rule := range l.GeoSite.Domain { + ruleVal := strings.TrimSpace(rule.GetValue()) + if len(ruleVal) == 0 { + continue + } + + switch rule.Type { + case router.Domain_Full: + gfwlistBytes = append(gfwlistBytes, []byte("|http://"+ruleVal+"\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("|https://"+ruleVal+"\n")...) + case router.Domain_RootDomain: + gfwlistBytes = append(gfwlistBytes, []byte("||"+ruleVal+"\n")...) + case router.Domain_Plain: + gfwlistBytes = append(gfwlistBytes, []byte(ruleVal+"\n")...) + case router.Domain_Regex: + gfwlistBytes = append(gfwlistBytes, []byte("/"+ruleVal+"/\n")...) + } + } + + return gfwlistBytes +} diff --git a/listinfomap.go b/listinfomap.go new file mode 100644 index 00000000..4149c4e4 --- /dev/null +++ b/listinfomap.go @@ -0,0 +1,132 @@ +package main + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" +) + +// ListInfoMap is the map of files in data directory and ListInfo +type ListInfoMap map[fileName]*ListInfo + +// Marshal processes a file in data directory and generates ListInfo for it. +func (lm *ListInfoMap) Marshal(path string) error { + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + + list := NewListInfo() + listName := fileName(strings.ToUpper(filepath.Base(path))) + list.Name = listName + if err := list.ProcessList(file); err != nil { + return err + } + + (*lm)[listName] = list + return nil +} + +// FlattenAndGenUniqueDomainList flattens the included lists and +// generates a domain trie for each file in data directory to +// make the items of domain type list unique. +func (lm *ListInfoMap) FlattenAndGenUniqueDomainList() error { + inclusionLevel := make([]map[fileName]bool, 0, 20) + okayList := make(map[fileName]bool) + inclusionLevelAllLength, loopTimes := 0, 0 + + for inclusionLevelAllLength < len(*lm) { + inclusionMap := make(map[fileName]bool) + + if loopTimes == 0 { + for _, listinfo := range *lm { + if listinfo.HasInclusion { + continue + } + inclusionMap[listinfo.Name] = true + } + } else { + for _, listinfo := range *lm { + if !listinfo.HasInclusion || okayList[listinfo.Name] { + continue + } + + var passTimes int + for filename := range listinfo.InclusionAttributeMap { + if !okayList[filename] { + break + } + passTimes++ + } + if passTimes == len(listinfo.InclusionAttributeMap) { + inclusionMap[listinfo.Name] = true + } + } + } + + for filename := range inclusionMap { + okayList[filename] = true + } + + inclusionLevel = append(inclusionLevel, inclusionMap) + inclusionLevelAllLength += len(inclusionMap) + loopTimes++ + } + + for idx, inclusionMap := range inclusionLevel { + fmt.Printf("Level %d:\n", idx+1) + fmt.Println(inclusionMap) + fmt.Println() + + for inclusionFilename := range inclusionMap { + if err := (*lm)[inclusionFilename].Flatten(lm); err != nil { + return err + } + } + } + + return nil +} + +// ToProto generates a router.GeoSite for each file in data directory +// and returns a router.GeoSiteList +func (lm *ListInfoMap) ToProto(excludeAttrs map[fileName]map[attribute]bool) *router.GeoSiteList { + protoList := new(router.GeoSiteList) + for _, listinfo := range *lm { + listinfo.ToGeoSite(excludeAttrs) + protoList.Entry = append(protoList.Entry, listinfo.GeoSite) + } + return protoList +} + +// ToPlainText returns a map of exported lists that user wants +// and the contents of them in byte format. +func (lm *ListInfoMap) ToPlainText(exportListsMap []string) (map[string][]byte, error) { + filePlainTextBytesMap := make(map[string][]byte) + for _, filename := range exportListsMap { + if listinfo := (*lm)[fileName(strings.ToUpper(filename))]; listinfo != nil { + plaintextBytes := listinfo.ToPlainText() + filePlainTextBytesMap[filename] = plaintextBytes + } else { + fmt.Println("Notice: " + filename + ": no such exported list in the directory, skipped.") + } + } + return filePlainTextBytesMap, nil +} + +// ToGFWList returns the content of the list to be generated into GFWList format +// that user wants in bytes format. +func (lm *ListInfoMap) ToGFWList(togfwlist string) ([]byte, error) { + if togfwlist != "" { + if listinfo := (*lm)[fileName(strings.ToUpper(togfwlist))]; listinfo != nil { + return listinfo.ToGFWList(), nil + } + return nil, errors.New("no such list: " + togfwlist) + } + return nil, nil +} diff --git a/main.go b/main.go new file mode 100644 index 00000000..b0dbcf1e --- /dev/null +++ b/main.go @@ -0,0 +1,133 @@ +package main + +import ( + "encoding/base64" + "flag" + "fmt" + "os" + "path/filepath" + "strings" + + "google.golang.org/protobuf/proto" +) + +var ( + dataPath = flag.String("datapath", filepath.Join("./", "data"), "Path to your custom 'data' directory") + datName = flag.String("datname", "geosite.dat", "Name of the generated dat file") + outputPath = flag.String("outputpath", "./publish", "Output path to the generated files") + exportLists = flag.String("exportlists", "private,microsoft,apple,google,category-games,geolocation-!cn,cn", "Lists to be exported in plaintext format, separated by ',' comma") + excludeAttrs = flag.String("excludeattrs", "microsoft@ads,apple@ads,google@ads,category-games@ads,geolocation-!cn@cn@ads,cn@!cn@ads", "Exclude rules with certain attributes in certain lists, seperated by ',' comma, support multiple attributes in one list. Example: geolocation-!cn@cn@ads,geolocation-cn@!cn") + toGFWList = flag.String("togfwlist", "geolocation-!cn", "List to be exported in GFWList format") +) + +func main() { + flag.Parse() + + dir := GetDataDir() + listInfoMap := make(ListInfoMap) + + if err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + if err := listInfoMap.Marshal(path); err != nil { + return err + } + return nil + }); err != nil { + fmt.Println("Failed:", err) + os.Exit(1) + } + + if err := listInfoMap.FlattenAndGenUniqueDomainList(); err != nil { + fmt.Println("Failed:", err) + os.Exit(1) + } + + // Process and split *excludeRules + excludeAttrsInFile := make(map[fileName]map[attribute]bool) + if *excludeAttrs != "" { + exFilenameAttrSlice := strings.Split(*excludeAttrs, ",") + for _, exFilenameAttr := range exFilenameAttrSlice { + exFilenameAttr = strings.TrimSpace(exFilenameAttr) + exFilenameAttrMap := strings.Split(exFilenameAttr, "@") + filename := fileName(strings.ToUpper(strings.TrimSpace(exFilenameAttrMap[0]))) + excludeAttrsInFile[filename] = make(map[attribute]bool) + for _, attr := range exFilenameAttrMap[1:] { + attr = strings.TrimSpace(attr) + if len(attr) > 0 { + excludeAttrsInFile[filename][attribute(attr)] = true + } + } + } + } + + // Process and split *exportLists + var exportListsSlice []string + if *exportLists != "" { + tempSlice := strings.Split(*exportLists, ",") + for _, exportList := range tempSlice { + exportList = strings.TrimSpace(exportList) + if len(exportList) > 0 { + exportListsSlice = append(exportListsSlice, exportList) + } + } + } + + // Generate dlc.dat + if geositeList := listInfoMap.ToProto(excludeAttrsInFile); geositeList != nil { + protoBytes, err := proto.Marshal(geositeList) + if err != nil { + fmt.Println("Failed:", err) + os.Exit(1) + } + if err := os.MkdirAll(*outputPath, 0755); err != nil { + fmt.Println("Failed:", err) + os.Exit(1) + } + if err := os.WriteFile(filepath.Join(*outputPath, *datName), protoBytes, 0644); err != nil { + fmt.Println("Failed:", err) + os.Exit(1) + } else { + fmt.Printf("%s has been generated successfully in '%s'.\n", *datName, *outputPath) + } + } + + // Generate plaintext list files + if filePlainTextBytesMap, err := listInfoMap.ToPlainText(exportListsSlice); err == nil { + for filename, plaintextBytes := range filePlainTextBytesMap { + filename += ".txt" + if err := os.WriteFile(filepath.Join(*outputPath, filename), plaintextBytes, 0644); err != nil { + fmt.Println("Failed:", err) + os.Exit(1) + } else { + fmt.Printf("%s has been generated successfully in '%s'.\n", filename, *outputPath) + } + } + } else { + fmt.Println("Failed:", err) + os.Exit(1) + } + + // Generate gfwlist.txt + if gfwlistBytes, err := listInfoMap.ToGFWList(*toGFWList); err == nil { + if f, err := os.OpenFile(filepath.Join(*outputPath, "gfwlist.txt"), os.O_RDWR|os.O_CREATE, 0644); err != nil { + fmt.Println("Failed:", err) + os.Exit(1) + } else { + encoder := base64.NewEncoder(base64.StdEncoding, f) + defer encoder.Close() + if _, err := encoder.Write(gfwlistBytes); err != nil { + fmt.Println("Failed:", err) + os.Exit(1) + } + fmt.Printf("gfwlist.txt has been generated successfully in '%s'.\n", *outputPath) + } + } else { + fmt.Println("Failed:", err) + os.Exit(1) + } +} diff --git a/trie.go b/trie.go new file mode 100644 index 00000000..a34748e1 --- /dev/null +++ b/trie.go @@ -0,0 +1,73 @@ +package main + +import ( + "errors" + "strings" +) + +type node struct { + leaf bool + children map[string]*node +} + +func newNode() *node { + return &node{ + leaf: false, + children: make(map[string]*node), + } +} + +func (n *node) getChild(s string) *node { + return n.children[s] +} + +func (n *node) hasChild(s string) bool { + return n.getChild(s) != nil +} + +func (n *node) addChild(s string, child *node) { + n.children[s] = child +} + +func (n *node) isLeaf() bool { + return n.leaf +} + +// DomainTrie is a domain trie for domain type rules. +type DomainTrie struct { + root *node +} + +// NewDomainTrie creates and returns a new domain trie. +func NewDomainTrie() *DomainTrie { + return &DomainTrie{ + root: newNode(), + } +} + +// Insert inserts a domain rule string into the domain trie +// and return whether is inserted successfully or not. +func (t *DomainTrie) Insert(domain string) (bool, error) { + if domain == "" { + return false, errors.New("empty domain") + } + parts := strings.Split(domain, ".") + + node := t.root + for i := len(parts) - 1; i >= 0; i-- { + part := parts[i] + + if node.isLeaf() { + return false, nil + } + if !node.hasChild(part) { + node.addChild(part, newNode()) + if i == 0 { + node.getChild(part).leaf = true + return true, nil + } + } + node = node.getChild(part) + } + return false, nil +}