In [9]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Base URL of the directory
BASE_URL = "https://downloads.rijkswaterstaatdata.nl/jarkus_profielen/"

# Function to create a directory
def create_directory(path):
 if not os.path.exists(path):
 os.makedirs(path)

# Function to download a file
def download_file(url, save_path):
 response = requests.get(url, stream=True)
 if response.status_code == 200:
 with open(save_path, 'wb') as f:
 for chunk in response.iter_content(chunk_size=8192):
 f.write(chunk)
 else:
 print(f"Failed to download {url}")

# Recursive function to scrape and download
def scrape_and_download(base_url, save_dir):
 response = requests.get(base_url)
 soup = BeautifulSoup(response.text, 'html.parser')

 for link in soup.find_all('a'):
 href = link.get('href')
 if not href or href in ['../', './']: # Skip parent and current directory links
 continue

 full_url = urljoin(base_url, href)

 if href.endswith('/'): # It's a folder
 folder_name = href.strip('/')
 folder_path = os.path.join(save_dir, folder_name)
 create_directory(folder_path)
 print(f"Entering folder: {folder_name}")
 scrape_and_download(full_url, folder_path)
 else: # It's a file
 file_path = os.path.join(save_dir, href)
 print(f"Downloading file: {href}")
 download_file(full_url, file_path)

# Main execution
if __name__ == "__main__":
 save_directory = "jarkus_profielen" # Local save path
 create_directory(save_directory)
 scrape_and_download(BASE_URL, save_directory)


Entering folder: Jrk_Ameland
Downloading file: amel00jk.jrk
Downloading file: amel01jk.jrk
Downloading file: amel02jk.jrk
Downloading file: amel03jk.jrk
Downloading file: amel04jk.jrk
Downloading file: amel05jk.jrk
Downloading file: amel06jk.jrk
Downloading file: amel07jk.jrk
Downloading file: amel08jk.jrk
Downloading file: amel09jk.jrk
Downloading file: amel10jk.jrk
Downloading file: amel11jk.jrk
Downloading file: amel12jk.jrk
Downloading file: amel13jk.jrk
Downloading file: amel14jk.jrk
Downloading file: amel15jk.jrk
Downloading file: amel16jk.jrk
Downloading file: amel17jk.jrk
Downloading file: amel18jk.jrk
Downloading file: amel19jk.jrk
Downloading file: amel20jk.jrk
Downloading file: amel21jk.jrk
Downloading file: amel22jk.jrk
Downloading file: amel23jk.jrk
Downloading file: amel24jk.jrk
Downloading file: amel65jk.jrk
Downloading file: amel66jk.jrk
Downloading file: amel67jk.jrk
Downloading file: amel68jk.jrk
Downloading file: amel69jk.jrk
Downloading file: amel70jk.jrk
Downloadin

In [4]:
import os
import shutil

def copy_files_with_extension(source_dir, target_dir, file_extension="24jk.jrk"):
 """
 Explore a folder and its subfolders, and copy all files with a specific extension to a new folder.

 Args:
 source_dir (str): Path to the source directory to explore.
 target_dir (str): Path to the target directory where files will be copied.
 file_extension (str): The file extension to search for (default is "24jk.jrk").
 """
 if not os.path.exists(target_dir):
 os.makedirs(target_dir)

 for root, _, files in os.walk(source_dir):
 for file in files:
 if file.endswith(file_extension):
 source_file = os.path.join(root, file)
 target_file = os.path.join(target_dir, file)
 shutil.copy(source_file, target_file)
 print(f"Copied: {source_file} -> {target_file}")

# Example usage
source_folder = r'C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen' # Replace with the path to your source folder
target_folder = r'C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen' # Replace with the path to your target folder
copy_files_with_extension(source_folder, target_folder)

Copied: C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\Jrk_Ameland\amel24jk.jrk -> C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\amel24jk.jrk
Copied: C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\Jrk_Delfland\delf24jk.jrk -> C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\delf24jk.jrk
Copied: C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\Jrk_Goeree\goer24jk.jrk -> C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\goer24jk.jrk
Copied: C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\Jrk_Maasvlakte\msvl24jk.jrk -> C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\msvl24jk.jrk
Copied: C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\Jrk_Noord_Beveland\nbev24jk.jrk -> C:\Users\fuentesm\Projects\Coastviewer\Data\Input\Geodata\jarkus_profielen\nbev24jk

In [5]:
import os

def rename_files_based_on_reference(source_dir_2023, target_dir_2024):
 """
 Rename files in the target_dir_2024 by adding a prefix based on the corresponding file in source_dir_2023.

 Args:
 source_dir_2023 (str): Path to the folder containing the reference file names.
 target_dir_2024 (str): Path to the folder containing files to be renamed.
 """
 # Get sorted lists of filenames from both folders
 filenames_2023 = sorted(os.listdir(source_dir_2023))
 filenames_2024 = sorted(os.listdir(target_dir_2024))

 # Create a mapping from the 2023 filenames (without prefix) to their prefixes
 prefix_mapping = {}
 for name in filenames_2023:
 # Split the prefix and the rest of the name (e.g., "03-amel23jk.jrk" -> "03", "amel23jk.jrk")
 prefix, remainder = name.split('-', 1)
 key = remainder.split('23')[0] # Extract the base key (e.g., "amel" from "amel23jk.jrk")
 prefix_mapping[key] = prefix

 # Rename files in the 2024 folder
 for old_name in filenames_2024:
 # Extract the base key (e.g., "amel" from "amel24jk.jrk")
 key = old_name.split('24')[0]
 # Get the corresponding prefix from the mapping
 if key in prefix_mapping:
 prefix = prefix_mapping[key]
 new_name = f"{prefix}-{old_name}" # Add the prefix to the current file name
 old_path = os.path.join(target_dir_2024, old_name)
 new_path = os.path.join(target_dir_2024, new_name)
 os.rename(old_path, new_path)
 print(f"Renamed: {old_name} -> {new_name}")
 else:
 print(f"No matching prefix found for {old_name}. Skipping.")

# Example usage
source_folder_2023 = r"C:\Users\fuentesm\Projects\Coastviewer\Tools\Jarkus\raw\annual\2023" # Replace with the path to the 2023 folder
target_folder_2024 = r"C:\Users\fuentesm\Projects\Coastviewer\Tools\Jarkus\raw\annual\2024" # Replace with the path to the 2024 folder
rename_files_based_on_reference(source_folder_2023, target_folder_2024)


Renamed: amel24jk.jrk -> 03-amel24jk.jrk
Renamed: delf24jk.jrk -> 09-delf24jk.jrk
Renamed: goer24jk.jrk -> 12-goer24jk.jrk
Renamed: msvl24jk.jrk -> 10-msvl24jk.jrk
Renamed: nbev24jk.jrk -> 15-nbev24jk.jrk
Renamed: nhol24jk.jrk -> 07-nhol24jk.jrk
Renamed: osch24jk.jrk -> 14-osch24jk.jrk
Renamed: rnld24jk.jrk -> 08-rnld24jk.jrk
Renamed: rotm24jk.jrk -> 01-rotm24jk.jrk
Renamed: schr24jk.jrk -> 02-schr24jk.jrk
Renamed: schw24jk.jrk -> 13-schw24jk.jrk
Renamed: ters24jk.jrk -> 04-ters24jk.jrk
Renamed: texl24jk.jrk -> 06-texl24jk.jrk
Renamed: vlie24jk.jrk -> 05-vlie24jk.jrk
Renamed: vrne24jk.jrk -> 11-vrne24jk.jrk
Renamed: walc24jk.jrk -> 16-walc24jk.jrk
Renamed: zwvl24jk.jrk -> 17-zwvl24jk.jrk
