Add scripts

This commit is contained in:
Anthony Wang 2021-12-07 18:23:20 -06:00
parent b86221c202
commit b3fb79815e
Signed by: a
GPG key ID: BC96B00AEC5F2D76
4 changed files with 106 additions and 0 deletions

3
.gitignore vendored
View file

@ -1 +1,4 @@
.ipynb_checkpoints
data
*.html
out

1
curl Normal file
View file

@ -0,0 +1 @@
curl 'https://revenue.stlouisco.com/ias/AsmtInfo.aspx?Locator={0}' -s -X POST -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://revenue.stlouisco.com/ias/SearchResults.aspx' -H 'Content-Type: application/x-www-form-urlencoded' -H 'Origin: https://revenue.stlouisco.com' -H 'DNT: 1' -H 'Connection: keep-alive' -H 'Cookie: ASP.NET_SessionId=t3j1it55jmzo1qbo2p1mxfzr' -H 'Upgrade-Insecure-Requests: 1' -H 'Sec-Fetch-Dest: document' -H 'Sec-Fetch-Mode: navigate' -H 'Sec-Fetch-Site: same-origin' --data-raw '__LASTFOCUS=&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUJNzk3NzM0MjU5D2QWAmYPZBYCAgMPZBYCAgEPZBYCAgEPZBYCAgUPDxYCHgdWaXNpYmxlZ2QWCgIBDw8WAh4EVGV4dAUSMTE3MSBSZWNvcmRzIEZvdW5kZGQCAw8PFgIfAQULUGFnZSAxIG9mIDZkZAIFDw8WAh8AZ2QWLAIBDw8WAh8BBQVGaXJzdBYCHgdvbmNsaWNrBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCBQ8PFgIfAQUPPGI%2BUHJldmlvdXM8L2I%2BFgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAgkPDxYEHwEFDSZuYnNwOyZuYnNwOzEfAGdkZAILDw8WAh8BBQExFgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAg0PDxYEHwEFBiZuYnNwOx8AZ2RkAg8PDxYEHwEFATIfAGcWAh8CBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCEQ8PFgQfAQUGJm5ic3A7HwBnZGQCEw8PFgQfAQUBMx8AZxYCHwIFF1NldEN1cnNvclN0eWxlKCd3YWl0Jyk7ZAIVDw8WBB8BBQYmbmJzcDsfAGdkZAIXDw8WBB8BBQE0HwBnFgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAhkPDxYEHwEFBiZuYnNwOx8AZ2RkAhsPDxYEHwEFATUfAGcWAh8CBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCHQ8PFgQfAQUGJm5ic3A7HwBnZGQCHw8PFgQfAQUBNh8AZxYCHwIFF1NldEN1cnNvclN0eWxlKCd3YWl0Jyk7ZAIjDw8WAh8BBQE3FgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAicPDxYCHwEFATgWAh8CBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCKw8PFgIfAQUBORYCHwIFF1NldEN1cnNvclN0eWxlKCd3YWl0Jyk7ZAIvDw8WAh8BBQIxMBYCHwIFF1NldEN1cnNvclN0eWxlKCd3YWl0Jyk7ZAIxDw8WAh8AZ2RkAjMPDxYEHwEFCzxiPk5leHQ8L2I%2BHwBnFgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAjUPDxYCHwBnZGQCNw8PFgIfAGdkZAIJDw8WAh8AZ2QWLAIBDw8WAh8BBQVGaXJzdBYCHwIFF1NldEN1cnNvclN0eWxlKCd3YWl0Jyk7ZAIFDw8WAh8BBQ88Yj5QcmV2aW91czwvYj4WAh8CBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCCQ8PFgQfAQUNJm5ic3A7Jm5ic3A7MR8AZ2RkAgsPDxYCHwEFATEWAh8CBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCDQ8PFgQfAQUGJm5ic3A7HwBnZGQCDw8PFgQfAQUBMh8AZxYCHwIFF1NldEN1cnNvclN0eWxlKCd3YWl0Jyk7ZAIRDw8WBB8BBQYmbmJzcDsfAGdkZAITDw8WBB8BBQEzHwBnFgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAhUPDxYEHwEFBiZuYnNwOx8AZ2RkAhcPDxYEHwEFATQfAGcWAh8CBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCGQ8PFgQfAQUGJm5ic3A7HwBnZGQCGw8PFgQfAQUBNR8AZxYCHwIFF1NldEN1cnNvclN0eWxlKCd3YWl0Jyk7ZAIdDw8WBB8BBQYmbmJzcDsfAGdkZAIfDw8WBB8BBQE2HwBnFgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAiMPDxYCHwEFATcWAh8CBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCJw8PFgIfAQUBOBYCHwIFF1NldEN1cnNvclN0eWxlKCd3YWl0Jyk7ZAIrDw8WAh8BBQE5FgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAi8PDxYCHwEFAjEwFgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAjEPDxYCHwBnZGQCMw8PFgQfAQUUDQoJCQkJCQkJPGI%2BTmV4dDwvYj4fAGcWAh8CBRdTZXRDdXJzb3JTdHlsZSgnd2FpdCcpO2QCNQ8PFgIfAGdkZAI3Dw8WBB8BBQRMYXN0HwBnFgIfAgUXU2V0Q3Vyc29yU3R5bGUoJ3dhaXQnKTtkAgsPDxYCHwEF1QI8dWwgc3R5bGU9J2NvbG9yOmdyZWVuOyc%2BPGxpPjxzcGFuIHN0eWxlPSdjb2xvcjpibGFjazsnPkZvciBtb3JlIGluZm9ybWF0aW9uLCBjbGljayBvbiBhbnkgcGFydCBvZiBhIGhpZ2hsaWdodGVkIHJvdy48L3NwYW4%2BPC9saT48bGk%2BPHNwYW4gc3R5bGU9J2NvbG9yOmJsYWNrOyc%2BVG8gdmlldyBhIHBhcmNlbCB2aWEgdGhlIGludGVyYWN0aXZlIG1hcHMsIGNsaWNrIG9uIHRoZSBzZWNvbmQgY29sdW1uLjwvc3Bhbj48L2xpPjxsaT48c3BhbiBzdHlsZT0nY29sb3I6YmxhY2s7Jz5UbyBjaGFuZ2UgdGhlIHNvcnQgb3JkZXIsIGNsaWNrIG9uIGEgY29sdW1uIGhlYWRlci48L3NwYW4%2BPC9saT48L3VsPmRkZKM%2BHPZWOqCKRLqh3Ug019vc%2Fsbz&__VIEWSTATEGENERATOR=27CECC6A&__PREVIOUSPAGE=CrA6B5EZ75bgkgvtRFAwN7nm9H9wBKkxCwvQbtJuUb7ZgpzAZbz8FXJdYb3ZEx61enmA6qMQgNWLj23wZ8FKvXuZbdk1&__EVENTVALIDATION=%2FwEWJAKKz4HMCgLB7%2FuoCwKA46y0DQKA46i0DQKA48S0DQKA48C0DQKA47y0DQL14oTlCALIkJmnBwL%2FwKbOAQKthLLmBQLJmOGeAQLq3aScAgLq3aCcAgLq3bycAgLq3bicAgLq3bScAgLf3fycCgLe4pP5AQKh0pWZAQL1svPiAQLwnLSLCAL07aSNCgLMh7TVDwKu3%2F2vDALO99PBAQLWo9ZPAv3PysYOAvfyyt4MAs%2FO4aYBAsGMtL0DAouX1pcJAoL8%2FPMNAvmz7b0JArjfpNoBAveaoo8BRq35gnkhmy9QjduivDhu0IER56k%3D&ctl00%24MainContent%24hidSearchType=OWNER&ctl00%24MainContent%24hidLocator=&ctl00%24MainContent%24hidFirstName=&ctl00%24MainContent%24hidLastName=moore&ctl00%24MainContent%24hidAddrNumber=&ctl00%24MainContent%24hidAddrDirection=&ctl00%24MainContent%24hidAddrStreet=&ctl00%24MainContent%24hidAddrSuffix=&ctl00%24MainContent%24hidSubdivision=&ctl00%24MainContent%24hidOrderBy=OWNER&ctl00%24MainContent%24hidIsAsc=1&ctl00%24MainContent%24hidPageNum=1&ctl00%24MainContent%24hidTotalPages=6&ctl00%24MainContent%24hidLowerPageNum=1&ctl00%24MainContent%24hidUpperPageNum=6&ctl00%24MainContent%24hidIsNewSearch=False&hidLocatorNum={0}&hidRefreshDate=Monday%2C+December+06%2C+2021&hidShowDataCase=UpperCase&ctl00%24MainContent%24hidESI=%25c3%2598%25c3%25b7%252f%25c5%2592%2512&hidELN={1}&hidINL=true'

85
main.ipynb Normal file
View file

@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4278b2ab",
"metadata": {},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7940b73",
"metadata": {},
"outputs": [],
"source": [
"with open('curl', 'r') as f:\n",
" curl = f.read()[:-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def test(locator, eln):\n",
" # print(curl.format(locator, eln.replace('%', '%25')))\n",
" ret = os.system(curl.format(locator, eln.replace('%', '%25')) + ' | grep LADUE > /dev/null')\n",
" return ret == 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c27d1a2f",
"metadata": {},
"outputs": [],
"source": [
"for i in range(0, 6):\n",
" with open(str(i) + '.html') as f:\n",
" r = f.read()\n",
" pos = 0\n",
" cnt = 0\n",
" while (x := r.find('return ShowParcelData(', pos)) != -1:\n",
" pos = x + 1\n",
" cnt += 1\n",
" if cnt % 3 != 0:\n",
" continue\n",
" m = r.find(' ', x + 23)\n",
" locator = r[x + 23:m - 2]\n",
" eln = r[m + 2:r.find(')', x) - 1]\n",
" if test(locator, eln):\n",
" name = r[r.find(')', x) + 4:r.find('<', x)]\n",
" print(locator, eln, name)"
]
}
],
"metadata": {
"interpreter": {
"hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
},
"kernelspec": {
"display_name": "C++11",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

17
main.py Normal file
View file

@ -0,0 +1,17 @@
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent='This is a test')
ans = []
with open('data', 'r') as f:
for line in f.readlines():
address = line.split('\t')[3] + ' St Louis, MO'
# print(address)
location = geolocator.geocode(address)
print(location)
if location == None:
print('Error!')
else:
print('Ladue' in location.address or 'Creve Coeur' in location.address)
ans += location
print(ans)