diff --git a/Stock-Analysis/.gitignore b/Stock-Analysis/.gitignore index 98a1238..7845c15 100644 --- a/Stock-Analysis/.gitignore +++ b/Stock-Analysis/.gitignore @@ -1,2 +1,4 @@ .stonks/ -myapp.log \ No newline at end of file +myapp.log +.venv +__pycache__ \ No newline at end of file diff --git a/Stock-Analysis/README.md b/Stock-Analysis/README.md index b727286..c30bcfc 100644 --- a/Stock-Analysis/README.md +++ b/Stock-Analysis/README.md @@ -23,16 +23,20 @@ This Python script empowers you to effortlessly analyze and visualize stock rati ## Prerequisites - Python 3.x -- Required Python packages: `requests`, `csv`, `yfinance`, `pandas`, `matplotlib` +- Required Python packages: `requests`, `csv`, `yfinance`, `pandas`, `matplotlib`,`xlrd`,`openpyxl` - Internet connection to fetch stock data from Yahoo Finance ## Usage 1. **Install Dependencies**: Ensure that all required Python packages are installed. You can install them via pip: ``` - pip install requests yfinance pandas matplotlib + pip install requests yfinance pandas matplotlib xlrd openpyxl + ``` +or + ``` + pip install -r requirements.txt ``` 2. **Prepare Stock List**: Create a CSV file containing a list of stock symbols, with one symbol per line. -3. **Run the Script**: Execute the script `main.py`. You'll be prompted to enter the filename of the CSV containing stock symbols and whether to sort the data. +3. **Run the Script**: Execute the script `app.py`. You'll be prompted to enter the filename of the CSV containing stock symbols and whether to sort the data. The recommended file name is "topix_core30". 4. **View Data**: After execution, the script will display the fetched stock data. If opted, it will also generate a bar plot for visualization. 5. **Save Successful Symbols**: Optionally, you can choose to save the symbols for which data was successfully fetched into another CSV file. diff --git a/Stock-Analysis/app.py b/Stock-Analysis/app.py index c4d8ec1..330c8ca 100644 --- a/Stock-Analysis/app.py +++ b/Stock-Analysis/app.py @@ -63,7 +63,9 @@ def main(self): self.logger.info("No stocks loaded.") return - with Pool() as pool: + # Limit the number of Pool() to reduce memory consumption. + # This is to avoid running out of PC memory (or virtual memory) when executing parallel processing. + with Pool(processes=2) as pool: data = pool.map(self.stock_fetcher.fetch_stock_data, stocks) data = [stock_info for stock_info in data if stock_info is not None] @@ -73,9 +75,10 @@ def main(self): self.logger.warning("No valid stock data was fetched.") return - successful_symbols = [stock_info["symbol"] for stock_info in data] - + successful_symbols = data # Keep all data + df = self.data_processor.display_data(data, sort=sort_data) + self.stock_plotter.plot_data( df, save_figure=True, diff --git a/Stock-Analysis/data/aaa.csv b/Stock-Analysis/data/aaa.csv new file mode 100644 index 0000000..5fee11c --- /dev/null +++ b/Stock-Analysis/data/aaa.csv @@ -0,0 +1,31 @@ +Rate,Symbol +2.16667,2914.T +2.28571,3382.T +1.64706,4063.T +2.13333,4502.T +1.375,4568.T +1.8125,6098.T +2.11765,6367.T +1.46154,6501.T +2.0,6503.T +1.36364,6758.T +1.95238,6857.T +1.76471,6861.T +1.5,7011.T +1.76471,7203.T +2.23529,7267.T +1.6,7741.T +1.96296,7974.T +1.53846,8001.T +2.23077,8031.T +1.86364,8035.T +2.92857,8058.T +2.08333,8306.T +1.66667,8316.T +2.08333,8411.T +2.375,8729.T +1.81818,8766.T +2.0,9432.T +2.46667,9433.T +1.92857,9983.T +1.75,9984.T diff --git a/Stock-Analysis/data/topix_core30.csv b/Stock-Analysis/data/topix_core30.csv new file mode 100644 index 0000000..05d9473 --- /dev/null +++ b/Stock-Analysis/data/topix_core30.csv @@ -0,0 +1,31 @@ +2914.T +3382.T +4063.T +4502.T +4568.T +6098.T +6367.T +6501.T +6503.T +6758.T +6857.T +6861.T +7011.T +7203.T +7267.T +7741.T +7974.T +8001.T +8031.T +8035.T +8058.T +8306.T +8316.T +8411.T +8729.T +8766.T +9432.T +9433.T +9434.T +9983.T +9984.T diff --git a/Stock-Analysis/data_processor.py b/Stock-Analysis/data_processor.py index aa255a5..22c0d5c 100644 --- a/Stock-Analysis/data_processor.py +++ b/Stock-Analysis/data_processor.py @@ -21,22 +21,30 @@ def display_data(data, sort=False): return df @staticmethod - def save_successful_symbols(filename, successful_symbols): + def save_successful_symbols(filename, results): """Save successfully fetched stock symbols to a CSV file.""" - if not successful_symbols: - logging.getLoger(__name__).info("No successful symbols to save.") + if not results: + logging.getLogger(__name__).info("No successful symbols to save.") return - + + directory = "data" + #Constructing the file path + # Eliminate the possibility of unbound in exception handling + path = os.path.join(directory, filename) + try: - directory = "data" if not os.path.exists(directory): os.makedirs(directory) - #Constructing the file path - path = os.path.join(directory, filename) - with open(path, 'w', newline='') as file: - writer = csv.writer(file) - for symbol in successful_symbols: - writer.writerow([symbol]) + with open(path, 'w', newline='', encoding='utf-8') as file: + # DictWriter allows you to neatly divide dictionary data into columns + fieldnames = ['Rate', 'Symbol'] + writer = csv.DictWriter(file, fieldnames=fieldnames) + + # Write header (column name) + writer.writeheader() + # write all lines of data + for row in results: + writer.writerow(row) logging.getLogger(__name__).info(f"Successful symbols saved to {path}.") except PermissionError: logging.getLogger(__name__).error(f"Permission denied: Unable to save symbols to {path}. Please check file permissions") diff --git a/Stock-Analysis/fetch_stocks.py b/Stock-Analysis/fetch_stocks.py index a24cb76..df5322a 100644 --- a/Stock-Analysis/fetch_stocks.py +++ b/Stock-Analysis/fetch_stocks.py @@ -17,10 +17,20 @@ def fetch_stock_data(stock): # Check if recommendation data exists if 'recommendationMean' not in info: - raise KeyError("recommendationMean field missing in API response") - - rate = info["recommendationMean"] - return (rate, stock) + # log and return None instead of throwing an error + logging.getLogger(__name__).warning(f"No recommendation data for {stock}. Skipping.") + return None + + # First find the analyst recommendation value (recommendationMean) + # If not available, substitute current stock price (currentPrice) + rate = info.get("recommendationMean") + if rate is None: + # Obtain prices for Japanese stocks etc. + rate = info.get("currentPrice") or info.get("regularMarketPrice") or 0 + logging.getLogger(__name__).debug(f"{stock}: Using price as fallback.") + + # Unify the return value into a dictionary (for consistency with app.py) + return {"Rate": rate, "Symbol": stock} except ValueError as e: logging.getLogger(__name__).error( diff --git a/Stock-Analysis/generate_jpx_stocks_csv.py b/Stock-Analysis/generate_jpx_stocks_csv.py new file mode 100644 index 0000000..290c1da --- /dev/null +++ b/Stock-Analysis/generate_jpx_stocks_csv.py @@ -0,0 +1,50 @@ +import pandas as pd +import os +import logging + +''' +This is an example. +This pulls data from the Tokyo Stock Exchange to obtain a CSV of stocks for which data is available. +Please try to acquire other stocks as well. +''' +def fetch_and_save_core30(): + # URL of list of listed stocks (Excel) from JPX official website + # Japanese listed stocks + # This file contains market segmentation and size segmentation for all listed stocks + jpx_url = "https://www.jpx.co.jp/markets/statistics-equities/misc/tvdivq0000001vg2-att/data_j.xls" + + print("Obtaining the latest stock list from JPX...") + try: + # Read the Excel file (using pandas read_excel) + # Extract stocks with 'TOPIX Core30' listed in '規模区分(Size category)' column + df = pd.read_excel(jpx_url) + + # Filter only those with size category TOPIX Core30 + core30_df = df[df['規模区分'] == 'TOPIX Core30'] + + if core30_df.empty: + print("TOPIX Core30 stocks were not found. Please check the URL and file format.") + return + + # Convert the stock code to a format that can be used by yfinance (e.g. 7203.T) + # Convert the number in the 'code' column to a string and add '.T' to the end + symbols = core30_df['コード'].astype(str).apply(lambda x: x + ".T").tolist() + + # Save as CSV + directory = "data" + if not os.path.exists(directory): + os.makedirs(directory) + + output_path = os.path.join(directory, "topix_core30.csv") + + # Save without index and header (to match existing load_stocks.py specifications) + pd.Series(symbols).to_csv(output_path, index=False, header=False) + + print(f"Success: {len(symbols)} stocks are saved on {output_path}") + print("stocks sample:", symbols[:5]) + + except Exception as e: + print(f"Error!!: {e}") + +if __name__ == "__main__": + fetch_and_save_core30() \ No newline at end of file diff --git a/Stock-Analysis/load_stocks.py b/Stock-Analysis/load_stocks.py index e3ba3c7..fa210fd 100644 --- a/Stock-Analysis/load_stocks.py +++ b/Stock-Analysis/load_stocks.py @@ -5,21 +5,50 @@ class StockLoader: @staticmethod def load_stocks(filename): - """Load stock symbols from a CSV file.""" + """Load stock symbols from CSV, trying multiple character codes.""" stocks = [] try: - with open(filename, 'r') as file: - reader = csv.reader(file) - for line in reader: - if line and len(line) >= 1: - symbol = line[0].strip() - if symbol: - stocks.append(symbol) + #List of encodings to try. + #utf-8-sig can preferentially process UTF-8 with BOM (for Excel) + encodings = ['utf-8-sig', 'utf-8', 'cp932', 'shift_jis', 'euc-jp'] + + selected_enc = None + + for enc in encodings: + try: + with open(filename, 'r', encoding=enc) as file: + # Try loading it once and check if there are any errors + reader = csv.reader(file) + # Keep data temporarily + temp_stocks = [] + for line in reader: + if line and len(line) >= 1: + symbol = line[0].strip() + if symbol: + temp_stocks.append(symbol) + + # If you get to this point, loading is successful + stocks = temp_stocks + selected_enc = enc + break + except (UnicodeDecodeError, csv.Error): + # If decoding fails, try the next character code + continue + except FileNotFoundError: + logging.getLogger(__name__).error(f"File {filename} not found.") + return [] + + if selected_enc: + logging.getLogger(__name__).info(f"Successfully loaded {filename} using {selected_enc}.") + else: + logging.getLogger(__name__).error(f"Failed to decode {filename} with available encodings.") + return [] + if not stocks: logging.getLogger(__name__).warning(f"No valid stock symbols found in file {filename}.") - else: - logging.getLogger(__name__).info('File loaded successfully.') + return stocks + except FileNotFoundError: logging.getLogger(__name__).error(f"File {filename} not found. Please provide a valid file path.") return [] diff --git a/Stock-Analysis/plot_stocks.py b/Stock-Analysis/plot_stocks.py index f1c3464..ec8fa36 100644 --- a/Stock-Analysis/plot_stocks.py +++ b/Stock-Analysis/plot_stocks.py @@ -1,6 +1,7 @@ import matplotlib.pyplot as plt import logging import datetime +import os class StockPlotter: @@ -23,8 +24,13 @@ def plot_data(df, save_figure=False, figure_filename="plot.png", bar_width=0.6, plt.xticks(rotation=45, ha='right', fontsize=font_size) plt.tight_layout() if save_figure: + # Change to relative path and create the folder if it doesn't exist + results_dir = "results" + if not os.path.exists(results_dir): + os.makedirs(results_dir) + current_time = datetime.datetime.now().strftime("%d-%m-%Y") - figure_filename = f"/workspaces/Python_Project/Stock-Analysis/results/plot_{current_time}.png" + figure_filename = f"results/plot_{current_time}.png" plt.savefig(figure_filename, dpi=dpi) # Save the figure to a file with higher resolution logging.getLogger(__name__).info(f"Figure saved as {figure_filename}.") # Log that the figure has been saved else: diff --git a/Stock-Analysis/requirements.txt b/Stock-Analysis/requirements.txt new file mode 100644 index 0000000..d5ecdee Binary files /dev/null and b/Stock-Analysis/requirements.txt differ diff --git a/Stock-Analysis/results/plot_27-02-2026.png b/Stock-Analysis/results/plot_27-02-2026.png new file mode 100644 index 0000000..50da680 Binary files /dev/null and b/Stock-Analysis/results/plot_27-02-2026.png differ