Skip to content

Commit 254279c

Browse files
author
hgouchet
committed
First commit
1 parent 5069e7f commit 254279c

File tree

7 files changed

+8713
-0
lines changed

7 files changed

+8713
-0
lines changed

Dockerfile

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
FROM bash:4.4
2+
3+
RUN mkdir -p /opt/public-suffix
4+
WORKDIR /opt/public-suffix
5+
6+
ADD . /opt/public-suffix
7+
8+
RUN apk add --no-cache curl sed mysql-client
9+
10+
RUN curl -s "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat" | sed -e '/^\/\//d' -e '/^\s*$/d' > registry.dat

README.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Public Suffix for MySql
2+
3+
Mysql's functions to parse domain names based on the Public Suffix List.
4+
5+
6+
## Installation
7+
8+
```bash
9+
$ git clone https://github.com/rvflash/publicsuffix-sql
10+
```
11+
12+
13+
## Testing
14+
15+
Go into the source directory and starts the Docker.
16+
17+
```bash
18+
$ cd publicsuffix-sql
19+
$ docker-compose up
20+
$ docker exec -ti $(docker ps -lq) bash
21+
```
22+
23+
In the new prompt, opens the MySql client:
24+
25+
```bash
26+
$ mysql -h db -u root -ps3cr3tp4ssw0rd ps
27+
```
28+
29+
30+
## Features
31+
32+
* The function named `hostname` extracts the hostname in a URL.
33+
* Function named `domain` returns the hostname without the sub-domain (ex: google.com.au).
34+
* The `sld` and `tld` functions returns respectively the second level domain (ex: com.au) and top level domain (ex: au).
35+
36+
```
37+
> select hostname("https://www.google.com.au/search?q=golang");
38+
+-------------------------------------------------------+
39+
| hostname("https://www.google.com.au/search?q=golang") |
40+
+-------------------------------------------------------+
41+
| www.google.com.au |
42+
+-------------------------------------------------------+
43+
1 row in set (0.00 sec)
44+
45+
> select domain("https://www.google.com.au/");
46+
+--------------------------------------+
47+
| domain("https://www.google.com.au/") |
48+
+--------------------------------------+
49+
| google.com.au |
50+
+--------------------------------------+
51+
1 row in set (0.00 sec)
52+
53+
> select sld(domain("https://www.google.com.au/"));
54+
+-------------------------------------------+
55+
| sld(domain("https://www.google.com.au/")) |
56+
+-------------------------------------------+
57+
| com.au |
58+
+-------------------------------------------+
59+
1 row in set (0.00 sec)
60+
```

add-functions.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/usr/bin/env bash
2+
3+
mysql -h db -u root -ps3cr3tp4ssw0rd ps < functions.sql

docker-compose.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
version: '3'
2+
services:
3+
db:
4+
image: mariadb
5+
environment:
6+
MYSQL_ROOT_PASSWORD: s3cr3tp4ssw0rd
7+
MYSQL_DATABASE: ps
8+
ports:
9+
- 3306:3306
10+
app:
11+
depends_on:
12+
- db
13+
build: .
14+
command: ["./wait-for-it.sh", "db:3306", "--", "./add-functions.sh"]
15+
volumes:
16+
- .:/opt/public-suffix
17+
links:
18+
- db

functions.sql

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
-- Creates the workspace.
2+
CREATE TABLE IF NOT EXISTS public_suffix (
3+
suffix varchar(255) not null,
4+
primary key (suffix)
5+
) engine=InnoDB default charset=utf8 collate=utf8_general_ci;
6+
7+
-- Loads the public suffix list.
8+
LOAD DATA LOCAL INFILE '/opt/public-suffix/registry.dat'
9+
IGNORE INTO TABLE public_suffix
10+
LINES TERMINATED BY '\n';
11+
12+
DELIMITER $$
13+
14+
-- Extracts the hostname, like `www.rv.net`
15+
-- Sample: http://username:password@hostname:9090/path?arg=value#anchor
16+
DROP FUNCTION IF EXISTS HOSTNAME;
17+
CREATE FUNCTION HOSTNAME (uri VARCHAR(255) CHARSET utf8)
18+
RETURNS VARCHAR(255) CHARSET utf8 NO SQL
19+
BEGIN
20+
DECLARE host VARCHAR(255) CHARSET utf8;
21+
-- removes the scheme
22+
SET host = SUBSTRING_INDEX(uri, '://', -1);
23+
-- removes all chars before [at]
24+
SET host = SUBSTRING_INDEX(host, '@', -1);
25+
-- removes all before the protocol
26+
SET host = SUBSTRING_INDEX(host, ':', 1);
27+
-- removes the path
28+
SET host = SUBSTRING_INDEX(host, '/', 1);
29+
RETURN host;
30+
END $$
31+
32+
-- Returns only the second level domain, like `co.uk`
33+
DROP FUNCTION IF EXISTS SLD;
34+
CREATE FUNCTION SLD (host VARCHAR(255) CHARSET utf8)
35+
RETURNS VARCHAR(50) CHARSET utf8 READS SQL DATA
36+
BEGIN
37+
DECLARE sld VARCHAR(50) CHARSET utf8;
38+
SELECT suffix INTO sld
39+
FROM public_suffix
40+
WHERE suffix = SUBSTRING_INDEX(host, '.', -2);
41+
RETURN sld;
42+
END $$
43+
44+
-- Returns only the top level domain, like `fr`
45+
DROP FUNCTION IF EXISTS TLD;
46+
CREATE FUNCTION TLD (host VARCHAR(255) CHARSET utf8)
47+
RETURNS VARCHAR(5) CHARSET utf8 READS SQL DATA
48+
BEGIN
49+
DECLARE tld VARCHAR(5) CHARSET utf8;
50+
SELECT suffix INTO tld
51+
FROM public_suffix
52+
WHERE suffix = SUBSTRING_INDEX(host, '.', -1);
53+
RETURN tld;
54+
END $$
55+
56+
-- Returns the root domain like `rv.net` for `www.rv.net`
57+
DROP FUNCTION IF EXISTS DOMAIN;
58+
CREATE FUNCTION DOMAIN (uri VARCHAR(255) CHARSET utf8)
59+
RETURNS VARCHAR(255) CHARSET utf8 READS SQL DATA
60+
BEGIN
61+
DECLARE root, host VARCHAR(255) CHARSET utf8;
62+
SET host = HOSTNAME(uri);
63+
SET root = TLD(host);
64+
IF root != '' THEN
65+
SET root = SLD(host);
66+
IF root != '' THEN
67+
SET root = SUBSTRING_INDEX(host, '.', -3);
68+
ELSE
69+
SET root = SUBSTRING_INDEX(host, '.', -2);
70+
END IF;
71+
END IF;
72+
RETURN root;
73+
END $$
74+
75+
DELIMITER ;

0 commit comments

Comments
 (0)