Skip to content

Commit ce3d25e

Browse files
committed
modified documentation
1 parent 0f2db81 commit ce3d25e

7 files changed

+27
-11
lines changed

README.md

+9-3
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@
33
Feed Visualizer is a tool that can cluster RSS/Atom feed items based on semantic similarity and generate interactive visualization.
44
This tool can be used to generate 'semantic summary' of any website by reading it's RSS/Atom feed. Shown below is an image of how the visualization generated by Feed Visualizer looks like. If you like this tool please consider giving a ⭐ on github !
55

6-
![](sample_visualization3.gif)
6+
![](output.gif)
77

88

99
Interactive Demos:
10+
11+
* Visualization created from [NASA’s RSS Feed](https://www.nasa.gov/rss/dyn/breaking_news.rss) :
12+
https://ashishware.com/static/nasa_viz.html
13+
1014
* Visualization created from [Martin Fowler's Atom Feed](https://martinfowler.com/feed.atom) :
1115
[https://ashishware.com/static/martin_fowler_viz.html](https://ashishware.com/static/martin_fowler_viz.html)
1216

@@ -49,10 +53,11 @@ Now, we need to create a config file for Feed Visualizer. The config file contai
4953
"input_directory": "nasa",
5054
"output_directory": "nasa_output",
5155
"pretrained_model": "all-mpnet-base-v2",
52-
"clust_dist_threshold": 4,
56+
"clust_dist_threshold":1,
5357
"tsne_iter": 8000,
5458
"text_max_length": 2048,
55-
"topic_str_min_df": 0.25
59+
"random_state": 45,
60+
"topic_str_min_df": 0.20
5661
}
5762
```
5863

@@ -79,6 +84,7 @@ Here is some information on what each config setting does:
7984
"clust_dist_threshold": "Integer representing maximum radius of cluster. There is no correct value here. Experiment !",
8085
"tsne_iter": "Integer representing number of iterations for TSNE (higher is better)",
8186
"text_max_length": "Integer representing number of characters to read from content/description for semantic encoding.",
87+
"random_state": "A integer to which serves as random seed while generating visualization. Use same random_state for reproducible results with set of data",
8288
"topic_str_min_df": "A float. For example value of 0.25 means that only phrases which are present in 25% or more items in a cluster will be considered for being used as name of the cluster."
8389
}
8490
```

config.json

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"input_directory": "nasa",
3-
"output_directory": "feeds_output",
3+
"output_directory": "nasa_output",
44
"pretrained_model": "all-mpnet-base-v2",
5-
"clust_dist_threshold":0.5,
5+
"clust_dist_threshold":1,
66
"tsne_iter": 8000,
7-
"text_max_length": 8048,
7+
"text_max_length": 2048,
88
"random_state": 45,
9-
"topic_str_min_df": 0.25
10-
}
9+
"topic_str_min_df": 0.20
10+
}

nasa_visualization.png

3.15 KB
Loading

output.gif

912 KB
Loading

sample_visualization.gif

-456 KB
Binary file not shown.

sample_visualization2.gif

-1.45 MB
Binary file not shown.

visualization.html

+13-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
13
<head>
4+
<title>Feed Visualizer</title>
25
<!-- Load plotly.js into the DOM -->
36
<script src="https://d3js.org/d3.v7.min.js"></script>
47
<script src='https://cdn.plot.ly/plotly-2.11.1.min.js'></script>
58
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"
69
integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
710
<meta name="viewport" content="width=device-width, initial-scale=1">
11+
<style>
12+
#clusters span {
13+
cursor: pointer;
14+
}
15+
</style>
816

917
</head>
1018

@@ -59,7 +67,7 @@
5967
csv_data = d
6068
let clusterNumbers = []
6169
let topics = {}
62-
d.forEach(a => {topics[a.cluster] = a.topic;clusterNumbers.push(parseInt(a.cluster))})
70+
d.forEach(a => { topics[a.cluster] = a.topic; clusterNumbers.push(parseInt(a.cluster)) })
6371
cluster_count = Math.max(...clusterNumbers) + 1
6472
d3.select('#clusters')
6573
.selectAll('span')
@@ -71,7 +79,7 @@
7179
.style("border", "1px solid grey")
7280
.style("min-width", "25px")
7381
.style("display", "inline-block")
74-
.style("color", function (d) { return (d < (cluster_count*.3) || d > (cluster_count*.7))? 'white':'black'})
82+
.style("color", function (d) { return (d < (cluster_count * .3) || d > (cluster_count * .7)) ? 'white' : 'black' })
7583
//.style("text-shadow", "1px 1px grey")
7684
.style("margin", "1px")
7785
.style("border-radius", "2px")
@@ -122,4 +130,6 @@
122130
}
123131
makeplot()
124132
</script>
125-
</body>
133+
</body>
134+
135+
</html>

0 commit comments

Comments
 (0)