import requests import matplotlib.pyplot as plt from collections import Counter from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import nltk # Download the NLTK stopwords if you haven't already nltk.download('punkt') nltk.download('stopwords') nltk.download('punkt_tab') # Function to fetch and process the text def visualize_common_words(url): # Fetch the text document response = requests.get(url) text = response.text # Tokenize words and remove stop words stop_words = set(stopwords.words('english')) words = word_tokenize(text.lower()) filtered_words = [word for word in words if word.isalpha() and word not in stop_words] # Count the most common words word_counts = Counter(filtered_words) common_words = word_counts.most_common(30) # Split into names and counts for plotting words, counts = zip(*common_words) # Plotting plt.figure(figsize=(10, 5)) plt.barh(words, counts, color='skyblue') plt.xlabel('Frequency') plt.title('Most Common Words (Excluding Stop Words)') plt.gca().invert_yaxis() # Invert y axis for better readability plt.show() # Example usage url = 'https://ximarketing.github.io/data/LDAshort.txt' visualize_common_words(url)