1 /* SPDX-License-Identifier: GPL-3.0-or-later
2 * SPDX-FileCopyrightText: 2018-2019, 2021-2026 Soren Stoutner <soren@stoutner.com>
4 * This file is part of Privacy Browser Android <https://www.stoutner.com/privacy-browser-android/>.
6 * This program is free software: you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 * You should have received a copy of the GNU General Public License along with
17 * this program. If not, see <https://www.gnu.org/licenses/>.
20 package com.stoutner.privacybrowser.helpers
22 import android.content.res.AssetManager
24 import com.stoutner.privacybrowser.dataclasses.FilterList
25 import com.stoutner.privacybrowser.dataclasses.FilterListDataClass
26 import com.stoutner.privacybrowser.dataclasses.FilterListEntryDataClass
27 import com.stoutner.privacybrowser.dataclasses.FilterOptionDisposition
28 import com.stoutner.privacybrowser.dataclasses.Sublist
30 import java.io.BufferedReader
31 import java.io.IOException
32 import java.io.InputStreamReader
34 class ParseFilterListHelper {
35 fun parseFilterList(assetManager: AssetManager, filterListFilePath: String, filterList: FilterList): FilterListDataClass {
36 // Create the filter list data class.
37 val filterListDataClass = FilterListDataClass()
39 // Parse the filter list. The `try` is required by input stream reader.
41 // Load the filter list into a buffered reader.
42 val bufferedReader = BufferedReader(InputStreamReader(assetManager.open(filterListFilePath)))
44 // Populate the filter list enum.
45 filterListDataClass.filterList = filterList
47 // Create the temporary filter list entry string.
48 var filterListEntryString: String
50 // Parse each line of the filter list.
51 bufferedReader.forEachLine {
52 // Get the filter list entry.
53 filterListEntryString = it
55 // Create a filter list entry data class.
56 var filterListEntryDataClass = FilterListEntryDataClass()
58 // Store the filter list name in the entry data class.
59 filterListEntryDataClass.filterList = filterList
61 // Store the original filter list entry.
62 filterListEntryDataClass.originalEntryString = filterListEntryString
65 if (filterListEntryString.isBlank()) { // Ignore blank lines.
68 // Log the dropping of the line.
69 //Log.i("FilterLists", "$filterListEntryString NOT added from $filterListName (empty line).")
70 } else if (filterListEntryString.startsWith('[')) { // The line starts with `[`, which is the file format.
73 // Log the dropping of the line.
74 //Log.i("FilterLists", "$filterListEntryString NOT added from $filterListName (file format).");
75 } else if (filterListEntryString.contains("##") ||
76 filterListEntryString.contains("#?#") ||
77 filterListEntryString.contains("#@#") ||
78 filterListEntryString.contains("#$#")
79 ) { // The line contains unimplemented content filtering.
82 // Log the dropping of the line.
83 //Log.i("FilterLists", "$filterListEntryString NOT added from $filterListName (content filtering).");
84 } else if (filterListEntryString.startsWith('!')) { // The line starts with `!`, which are comments.
85 if (filterListEntryString.startsWith("! Title:")) { // The line contains the title.
86 // Add the title to the filter list data class.
87 filterListDataClass.titleString = filterListEntryString.substring(9)
89 // Log the addition of the filter list title.
90 //Log.i("FilterLists", "Filter list title: ${filterListDataClass.titleString} ADDED from $filterListName.")
91 } else if (filterListEntryString.startsWith("! Version:")) { // The line contains the version.
92 // Add the version to the filter list data class.
93 filterListDataClass.versionString = filterListEntryString.substring(11)
95 // Log the addition of the filter list title.
96 //Log.i("FilterLists", "Filter list version: ${filterListDataClass.versionString} ADDED from $filterListName.")
100 // Log the dropping of the line.
101 //Log.i("FilterLists", "$filterListEntryString NOT added from $filterListName (comment).");
103 } else { // Process the entry.
104 // Get the index of the last dollar sign.
105 val indexOfLastDollarSign = filterListEntryString.lastIndexOf('$')
107 // Process the filter options if they exist.
108 if (indexOfLastDollarSign > -1) {
109 // Store the original filter options string.
110 filterListEntryDataClass.originalFilterOptionsString = filterListEntryString.substring(indexOfLastDollarSign + 1)
112 // Store the entry without the filter options as the filter list string.
113 filterListEntryString = filterListEntryString.take(indexOfLastDollarSign)
115 // Split the options list.
116 val originalFilterOptionsList = filterListEntryDataClass.originalFilterOptionsString.split(',')
118 // Populate the applied filter options list.
119 for (filterOptionString in originalFilterOptionsList) {
120 // Only add filter options that are handled by Privacy Browser Android. <https://help.adblockplus.org/hc/en-us/articles/360062733293-How-to-write-filters>
121 // Currently these are only `domain` and `third-party`.
122 if (filterOptionString.contains("domain=") ||
123 filterOptionString.contains("third-party")
125 // Add the filter option to the applied filter options list.
126 filterListEntryDataClass.appliedFilterOptionsList.add(filterOptionString)
130 // Populate the filter option entries.
131 for (filterOptionString in filterListEntryDataClass.appliedFilterOptionsList) {
132 // Parse the filter options.
133 if (filterOptionString.startsWith("domain=")) { // Domain.
134 // Remove `domain=` from the filter option.
135 val modifiedFilterOptionString = filterOptionString.substring(7)
137 // Set the disposition according to the domain type.
138 if (modifiedFilterOptionString.startsWith('~')) { // Override domains.
139 // Remove the `~` from each domain.
140 modifiedFilterOptionString.replace("~", "")
142 // Populate the domain filter disposition.
143 filterListEntryDataClass.domain = FilterOptionDisposition.Override
144 } else { // Standard domains.
145 // Populate the domain filter disposition.
146 filterListEntryDataClass.domain = FilterOptionDisposition.Apply
149 // Store the domain list.
150 filterListEntryDataClass.domainList = modifiedFilterOptionString.split('|')
151 } else if (filterOptionString == "third-party") { // Third-party.
152 // Populate the third-party filter disposition.
153 filterListEntryDataClass.thirdParty = FilterOptionDisposition.Apply
154 } else if (filterOptionString == "~third-party") { // Third-party override.
155 // Populate the third-party filter disposition.
156 filterListEntryDataClass.thirdParty = FilterOptionDisposition.Override
159 } // Finished processing the filter options.
161 // Process the base entry.
162 if (filterListEntryDataClass.originalFilterOptionsString.isNotEmpty() &&
163 (filterListEntryDataClass.domain == FilterOptionDisposition.Null) &&
164 (filterListEntryDataClass.thirdParty == FilterOptionDisposition.Null)) { // There were filter options, but they have all been removed because they don't apply to Privacy Browser.
165 // Ignore these entries as they will have unintended consequences.
167 // Log the dropping of the entry.
168 //Log.i("FilterLists", "Unsupported filter options: ${filterListEntryDataClass.originalEntryString} NOT added from $filterListName.")
169 } else if (filterListEntryString.isEmpty()) { // There are no applied entries. This should check for the presence of request options in the future when they are supported in Privacy Browser Android.
170 // Ignore these entries as they will block all requests generally or for a specified domain. Typically these are left over after removing `csp=` filter options.
172 // Log the dropping of the entry.
173 //Log.i("FilterLists", "Dropped because nothing left is applied: ${filterListEntryDataClass.originalEntryString} NOT added from $filterListName.")
174 } else if (filterListEntryString.startsWith("@@")) { // Process an allow list entry.
175 // Remove the initial `@@`.
176 filterListEntryString = filterListEntryString.substring(2)
178 if (filterListEntryString.startsWith("||")) { // Process an initial domain allow list entry.
179 // Remove the initial `||`.
180 filterListEntryString = filterListEntryString.substring(2)
182 // Set the initial flag match.
183 filterListEntryDataClass.initialMatch = true
185 // Prepare the filter list string.
186 filterListEntryDataClass = prepareFilterListString(filterListEntryString, filterListEntryDataClass)
188 // Store the sublist.
189 filterListEntryDataClass.sublist = Sublist.InitialDomainAllowList
191 // Add the entry data class to the initial domain allow list.
192 filterListDataClass.initialDomainAllowList.add(filterListEntryDataClass)
194 // Log the addition to the filter list.
195 //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Initial Domain Allow List from $filterListName.")
196 } else if (filterListEntryString.contains("\\")) { // Process a regular expression allow list entry.
197 // Set the regular expression as the applied entry list.
198 filterListEntryDataClass.appliedEntryList = listOf(filterListEntryString)
200 // Store the sublist.
201 filterListEntryDataClass.sublist = Sublist.RegularExpressionBlockList
203 // Add the entry data class to the regular expression allow list.
204 filterListDataClass.regularExpressionAllowList.add(filterListEntryDataClass)
206 // Log the addition to the filter list.
207 //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Regular Expression Allow List from $filterListName.")
208 } else { // Process a main allow list entry.
209 // Prepare the filter list string.
210 filterListEntryDataClass = prepareFilterListString(filterListEntryString, filterListEntryDataClass)
212 // Store the sublist.
213 filterListEntryDataClass.sublist = Sublist.MainAllowList
215 // Add the entry data class to the main allow list.
216 filterListDataClass.mainAllowList.add(filterListEntryDataClass)
218 // Log the addition to the filter list.
219 //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Main Allow List from $filterListName.")
221 } else if (filterListEntryString.startsWith("||")) { // Process an initial domain block list entry.
222 // Remove the initial `||`.
223 filterListEntryString = filterListEntryString.substring(2)
225 // Set the initial match flag.
226 filterListEntryDataClass.initialMatch = true
228 // Prepare the filter list string.
229 filterListEntryDataClass = prepareFilterListString(filterListEntryString, filterListEntryDataClass)
231 // Store the sublist.
232 filterListEntryDataClass.sublist = Sublist.InitialDomainBlockList
234 // Add the entry data class to the initial domain block list.
235 filterListDataClass.initialDomainBlockList.add(filterListEntryDataClass)
237 // Log the addition to the filter list.
238 //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Initial Domain Block List from $filterListName.")
239 } else if (filterListEntryString.contains("\\")) { // Process a regular expression block list entry.
240 // Set the regular expression as the applied entry list.
241 filterListEntryDataClass.appliedEntryList = listOf(filterListEntryString)
243 // Store the sublist.
244 filterListEntryDataClass.sublist = Sublist.RegularExpressionBlockList
246 // Add the entry data clas to the regular expression allow list.
247 filterListDataClass.regularExpressionBlockList.add(filterListEntryDataClass)
249 // Log the addition ot the filter list.
250 //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Regular Expression Block List from $filterListName.")
251 } else { // Process a main block list entry.
252 // Prepare the filter list string.
253 filterListEntryDataClass = prepareFilterListString(filterListEntryString, filterListEntryDataClass)
255 // Store the sublist.
256 filterListEntryDataClass.sublist = Sublist.MainBlockList
258 // Add the entry to the main block list.
259 filterListDataClass.mainBlockList.add(filterListEntryDataClass)
261 // Log the addition to the filter list.
262 //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Main Block List from $filterListName.")
265 } // Finished processing the filter list.
267 // Close the buffered reader.
268 bufferedReader.close()
269 } catch (_: IOException) {
270 // Do nothing if the filter list cannot be read.
273 // Return the filter list data class.
274 return filterListDataClass
277 private fun prepareFilterListString(filterListEntryString: String, filterListEntryDataClass: FilterListEntryDataClass): FilterListEntryDataClass {
278 // Create a modified filter list entry string, as the variable passed in cannot be edited.
279 var modifiedFilterListEntryString = filterListEntryString
281 // Check if this is an initial match.
282 if (modifiedFilterListEntryString.startsWith('|')) {
283 // Strip the initial `|`.
284 modifiedFilterListEntryString = modifiedFilterListEntryString.substring(1)
286 // Set the initial match flag.
287 filterListEntryDataClass.initialMatch = true
290 // Check if this is a final match.
291 if (modifiedFilterListEntryString.endsWith('|')) {
292 // Strip the final `|`.
293 modifiedFilterListEntryString = modifiedFilterListEntryString.dropLast(1)
295 // Set the final match flag.
296 filterListEntryDataClass.finalMatch = true
299 // Remove the initial asterisk if it exists.
300 if (modifiedFilterListEntryString.startsWith('*'))
301 modifiedFilterListEntryString = modifiedFilterListEntryString.substring(1)
303 // Remove the final asterisk if it exists.
304 if (modifiedFilterListEntryString.endsWith('*'))
305 modifiedFilterListEntryString = modifiedFilterListEntryString.dropLast(1)
307 // Split the filter list entry string and set it as the applied entry list.
308 filterListEntryDataClass.appliedEntryList = modifiedFilterListEntryString.split('*')
310 // Store the size of the applied entry list.
311 filterListEntryDataClass.sizeOfAppliedEntryList = filterListEntryDataClass.appliedEntryList.size
313 // Determine if this is a single applied entry (including an empty entry, which, amazingly, are calculated as having a size of 1).
314 filterListEntryDataClass.singleAppliedEntry = (filterListEntryDataClass.sizeOfAppliedEntryList == 1)
316 // Return the filter list entry data class.
317 return filterListEntryDataClass