]> gitweb.stoutner.com Git - PrivacyBrowserAndroid.git/blob - app/src/main/java/com/stoutner/privacybrowser/helpers/ParseFilterListHelper.kt
Release 3.20.
[PrivacyBrowserAndroid.git] / app / src / main / java / com / stoutner / privacybrowser / helpers / ParseFilterListHelper.kt
1 /* SPDX-License-Identifier: GPL-3.0-or-later
2  * SPDX-FileCopyrightText: 2018-2019, 2021-2026 Soren Stoutner <soren@stoutner.com>
3  *
4  * This file is part of Privacy Browser Android <https://www.stoutner.com/privacy-browser-android/>.
5  *
6  * This program is free software: you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * This program is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program.  If not, see <https://www.gnu.org/licenses/>.
18  */
19
20 package com.stoutner.privacybrowser.helpers
21
22 import android.content.res.AssetManager
23
24 import com.stoutner.privacybrowser.dataclasses.FilterList
25 import com.stoutner.privacybrowser.dataclasses.FilterListDataClass
26 import com.stoutner.privacybrowser.dataclasses.FilterListEntryDataClass
27 import com.stoutner.privacybrowser.dataclasses.FilterOptionDisposition
28 import com.stoutner.privacybrowser.dataclasses.Sublist
29
30 import java.io.BufferedReader
31 import java.io.IOException
32 import java.io.InputStreamReader
33
34 class ParseFilterListHelper {
35     fun parseFilterList(assetManager: AssetManager, filterListFilePath: String, filterList: FilterList): FilterListDataClass {
36         // Create the filter list data class.
37         val filterListDataClass = FilterListDataClass()
38
39         // Parse the filter list.  The `try` is required by input stream reader.
40         try {
41             // Load the filter list into a buffered reader.
42             val bufferedReader = BufferedReader(InputStreamReader(assetManager.open(filterListFilePath)))
43
44             // Populate the filter list enum.
45             filterListDataClass.filterList = filterList
46
47             // Create the temporary filter list entry string.
48             var filterListEntryString: String
49
50             // Parse each line of the filter list.
51             bufferedReader.forEachLine {
52                 // Get the filter list entry.
53                 filterListEntryString = it
54
55                 // Create a filter list entry data class.
56                 var filterListEntryDataClass = FilterListEntryDataClass()
57
58                 // Store the filter list name in the entry data class.
59                 filterListEntryDataClass.filterList = filterList
60
61                 // Store the original filter list entry.
62                 filterListEntryDataClass.originalEntryString = filterListEntryString
63
64                 // Parse the entry.
65                 if (filterListEntryString.isBlank()) {  // Ignore blank lines.
66                     // Do nothing.
67
68                     // Log the dropping of the line.
69                     //Log.i("FilterLists", "$filterListEntryString  NOT added from  $filterListName  (empty line).")
70                 } else if (filterListEntryString.startsWith('[')) {  // The line starts with `[`, which is the file format.
71                     // Do nothing.
72
73                     // Log the dropping of the line.
74                     //Log.i("FilterLists", "$filterListEntryString  NOT added from  $filterListName  (file format).");
75                 } else if (filterListEntryString.contains("##") ||
76                     filterListEntryString.contains("#?#") ||
77                     filterListEntryString.contains("#@#") ||
78                     filterListEntryString.contains("#$#")
79                 ) {  // The line contains unimplemented content filtering.
80                     // Do nothing.
81
82                     // Log the dropping of the line.
83                     //Log.i("FilterLists", "$filterListEntryString  NOT added from  $filterListName  (content filtering).");
84                 } else if (filterListEntryString.startsWith('!')) {  // The line starts with `!`, which are comments.
85                     if (filterListEntryString.startsWith("! Title:")) {  // The line contains the title.
86                         // Add the title to the filter list data class.
87                         filterListDataClass.titleString = filterListEntryString.substring(9)
88
89                         // Log the addition of the filter list title.
90                         //Log.i("FilterLists", "Filter list title:  ${filterListDataClass.titleString}  ADDED from  $filterListName.")
91                     } else if (filterListEntryString.startsWith("! Version:")) {  // The line contains the version.
92                         // Add the version to the filter list data class.
93                         filterListDataClass.versionString = filterListEntryString.substring(11)
94
95                         // Log the addition of the filter list title.
96                         //Log.i("FilterLists", "Filter list version:  ${filterListDataClass.versionString}  ADDED from  $filterListName.")
97                     } else {
98                         // Else do nothing.
99
100                         // Log the dropping of the line.
101                         //Log.i("FilterLists", "$filterListEntryString  NOT added from  $filterListName  (comment).");
102                     }
103                 } else {  // Process the entry.
104                     // Get the index of the last dollar sign.
105                     val indexOfLastDollarSign = filterListEntryString.lastIndexOf('$')
106
107                     // Process the filter options if they exist.
108                     if (indexOfLastDollarSign > -1) {
109                         // Store the original filter options string.
110                         filterListEntryDataClass.originalFilterOptionsString = filterListEntryString.substring(indexOfLastDollarSign + 1)
111
112                         // Store the entry without the filter options as the filter list string.
113                         filterListEntryString = filterListEntryString.take(indexOfLastDollarSign)
114
115                         // Split the options list.
116                         val originalFilterOptionsList = filterListEntryDataClass.originalFilterOptionsString.split(',')
117
118                         // Populate the applied filter options list.
119                         for (filterOptionString in originalFilterOptionsList) {
120                             // Only add filter options that are handled by Privacy Browser Android.  <https://help.adblockplus.org/hc/en-us/articles/360062733293-How-to-write-filters>
121                             // Currently these are only `domain` and `third-party`.
122                             if (filterOptionString.contains("domain=") ||
123                                 filterOptionString.contains("third-party")
124                             ) {
125                                 // Add the filter option to the applied filter options list.
126                                 filterListEntryDataClass.appliedFilterOptionsList.add(filterOptionString)
127                             }
128                         }
129
130                         // Populate the filter option entries.
131                         for (filterOptionString in filterListEntryDataClass.appliedFilterOptionsList) {
132                             // Parse the filter options.
133                             if (filterOptionString.startsWith("domain=")) {  // Domain.
134                                 // Remove `domain=` from the filter option.
135                                 val modifiedFilterOptionString = filterOptionString.substring(7)
136
137                                 // Set the disposition according to the domain type.
138                                 if (modifiedFilterOptionString.startsWith('~')) { // Override domains.
139                                     // Remove the `~` from each domain.
140                                     modifiedFilterOptionString.replace("~", "")
141
142                                     // Populate the domain filter disposition.
143                                     filterListEntryDataClass.domain = FilterOptionDisposition.Override
144                                 } else {  // Standard domains.
145                                     // Populate the domain filter disposition.
146                                     filterListEntryDataClass.domain = FilterOptionDisposition.Apply
147                                 }
148
149                                 // Store the domain list.
150                                 filterListEntryDataClass.domainList = modifiedFilterOptionString.split('|')
151                             } else if (filterOptionString == "third-party") {  // Third-party.
152                                 // Populate the third-party filter disposition.
153                                 filterListEntryDataClass.thirdParty = FilterOptionDisposition.Apply
154                             } else if (filterOptionString == "~third-party") {  // Third-party override.
155                                 // Populate the third-party filter disposition.
156                                 filterListEntryDataClass.thirdParty = FilterOptionDisposition.Override
157                             }
158                         }
159                     }  // Finished processing the filter options.
160
161                     // Process the base entry.
162                     if (filterListEntryDataClass.originalFilterOptionsString.isNotEmpty() &&
163                     (filterListEntryDataClass.domain == FilterOptionDisposition.Null) &&
164                     (filterListEntryDataClass.thirdParty == FilterOptionDisposition.Null)) {  // There were filter options, but they have all been removed because they don't apply to Privacy Browser.
165                         // Ignore these entries as they will have unintended consequences.
166
167                         // Log the dropping of the entry.
168                         //Log.i("FilterLists", "Unsupported filter options:  ${filterListEntryDataClass.originalEntryString}  NOT added from  $filterListName.")
169                     } else if (filterListEntryString.isEmpty()) {  // There are no applied entries.  This should check for the presence of request options in the future when they are supported in Privacy Browser Android.
170                         // Ignore these entries as they will block all requests generally or for a specified domain.  Typically these are left over after removing `csp=` filter options.
171
172                         // Log the dropping of the entry.
173                         //Log.i("FilterLists", "Dropped because nothing left is applied:  ${filterListEntryDataClass.originalEntryString}  NOT added from  $filterListName.")
174                     } else if (filterListEntryString.startsWith("@@")) {  // Process an allow list entry.
175                         // Remove the initial `@@`.
176                         filterListEntryString = filterListEntryString.substring(2)
177
178                         if (filterListEntryString.startsWith("||")) {  // Process an initial domain allow list entry.
179                             // Remove the initial `||`.
180                             filterListEntryString = filterListEntryString.substring(2)
181
182                             // Set the initial flag match.
183                             filterListEntryDataClass.initialMatch = true
184
185                             // Prepare the filter list string.
186                             filterListEntryDataClass = prepareFilterListString(filterListEntryString, filterListEntryDataClass)
187
188                             // Store the sublist.
189                             filterListEntryDataClass.sublist = Sublist.InitialDomainAllowList
190
191                             // Add the entry data class to the initial domain allow list.
192                             filterListDataClass.initialDomainAllowList.add(filterListEntryDataClass)
193
194                             // Log the addition to the filter list.
195                             //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Initial Domain Allow List from $filterListName.")
196                         } else if (filterListEntryString.contains("\\")) {  // Process a regular expression allow list entry.
197                             // Set the regular expression as the applied entry list.
198                             filterListEntryDataClass.appliedEntryList = listOf(filterListEntryString)
199
200                             // Store the sublist.
201                             filterListEntryDataClass.sublist = Sublist.RegularExpressionBlockList
202
203                             // Add the entry data class to the regular expression allow list.
204                             filterListDataClass.regularExpressionAllowList.add(filterListEntryDataClass)
205
206                             // Log the addition to the filter list.
207                             //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Regular Expression Allow List from $filterListName.")
208                         } else {  // Process a main allow list entry.
209                             // Prepare the filter list string.
210                             filterListEntryDataClass = prepareFilterListString(filterListEntryString, filterListEntryDataClass)
211
212                             // Store the sublist.
213                             filterListEntryDataClass.sublist = Sublist.MainAllowList
214
215                             // Add the entry data class to the main allow list.
216                             filterListDataClass.mainAllowList.add(filterListEntryDataClass)
217
218                             // Log the addition to the filter list.
219                             //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Main Allow List from $filterListName.")
220                         }
221                     } else if (filterListEntryString.startsWith("||")) {  // Process an initial domain block list entry.
222                         // Remove the initial `||`.
223                         filterListEntryString = filterListEntryString.substring(2)
224
225                         // Set the initial match flag.
226                         filterListEntryDataClass.initialMatch = true
227
228                         // Prepare the filter list string.
229                         filterListEntryDataClass = prepareFilterListString(filterListEntryString, filterListEntryDataClass)
230
231                         // Store the sublist.
232                         filterListEntryDataClass.sublist = Sublist.InitialDomainBlockList
233
234                         // Add the entry data class to the initial domain block list.
235                         filterListDataClass.initialDomainBlockList.add(filterListEntryDataClass)
236
237                         // Log the addition to the filter list.
238                         //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Initial Domain Block List from $filterListName.")
239                     } else if (filterListEntryString.contains("\\")) {  // Process a regular expression block list entry.
240                         // Set the regular expression as the applied entry list.
241                         filterListEntryDataClass.appliedEntryList = listOf(filterListEntryString)
242
243                         // Store the sublist.
244                         filterListEntryDataClass.sublist = Sublist.RegularExpressionBlockList
245
246                         // Add the entry data clas to the regular expression allow list.
247                         filterListDataClass.regularExpressionBlockList.add(filterListEntryDataClass)
248
249                         // Log the addition ot the filter list.
250                         //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Regular Expression Block List from $filterListName.")
251                     } else {  // Process a main block list entry.
252                         // Prepare the filter list string.
253                         filterListEntryDataClass = prepareFilterListString(filterListEntryString, filterListEntryDataClass)
254
255                         // Store the sublist.
256                         filterListEntryDataClass.sublist = Sublist.MainBlockList
257
258                         // Add the entry to the main block list.
259                         filterListDataClass.mainBlockList.add(filterListEntryDataClass)
260
261                         // Log the addition to the filter list.
262                         //Log.i("FilterLists", "${filterListEntryDataClass.originalEntryString} added to Main Block List from $filterListName.")
263                     }
264                 }
265             }  // Finished processing the filter list.
266
267             // Close the buffered reader.
268             bufferedReader.close()
269         } catch (_: IOException) {
270             // Do nothing if the filter list cannot be read.
271         }
272
273         // Return the filter list data class.
274         return filterListDataClass
275     }
276
277     private fun prepareFilterListString(filterListEntryString: String, filterListEntryDataClass: FilterListEntryDataClass): FilterListEntryDataClass {
278         // Create a modified filter list entry string, as the variable passed in cannot be edited.
279         var modifiedFilterListEntryString = filterListEntryString
280
281         // Check if this is an initial match.
282         if (modifiedFilterListEntryString.startsWith('|')) {
283             // Strip the initial `|`.
284             modifiedFilterListEntryString = modifiedFilterListEntryString.substring(1)
285
286             // Set the initial match flag.
287             filterListEntryDataClass.initialMatch = true
288         }
289
290         // Check if this is a final match.
291         if (modifiedFilterListEntryString.endsWith('|')) {
292             // Strip the final `|`.
293             modifiedFilterListEntryString = modifiedFilterListEntryString.dropLast(1)
294
295             // Set the final match flag.
296             filterListEntryDataClass.finalMatch = true
297         }
298
299         // Remove the initial asterisk if it exists.
300         if (modifiedFilterListEntryString.startsWith('*'))
301             modifiedFilterListEntryString = modifiedFilterListEntryString.substring(1)
302
303         // Remove the final asterisk if it exists.
304         if (modifiedFilterListEntryString.endsWith('*'))
305             modifiedFilterListEntryString = modifiedFilterListEntryString.dropLast(1)
306
307         // Split the filter list entry string and set it as the applied entry list.
308         filterListEntryDataClass.appliedEntryList = modifiedFilterListEntryString.split('*')
309
310         // Store the size of the applied entry list.
311         filterListEntryDataClass.sizeOfAppliedEntryList = filterListEntryDataClass.appliedEntryList.size
312
313         // Determine if this is a single applied entry (including an empty entry, which, amazingly, are calculated as having a size of 1).
314         filterListEntryDataClass.singleAppliedEntry = (filterListEntryDataClass.sizeOfAppliedEntryList == 1)
315
316         // Return the filter list entry data class.
317         return filterListEntryDataClass
318     }
319 }