To automate the task of normalizing and consolidating daily CSV files from different sources into a standardized format using Unix shell scripting

Biswanath Giri
2 min readJul 14, 2024

--

#!/bin/bash

INPUT_DIR="/path/to/input"
OUTPUT_DIR="/path/to/output"
LOG_FILE="/path/to/log_file.log"

# Define the standard CSV header
STANDARD_HEADER="Date,ID,Value,Description"

# Function to normalize a single CSV file
normalize_csv() {
local input_file=$1
local output_file=$2

# Read the first line (header) of the input file
header=$(head -n 1 "$input_file")

# Determine the format and apply appropriate transformations
if [[ "$header" == "date,id,value,desc" ]]; then
# Transform the CSV file using awk, sed, and cut
tail -n +2 "$input_file" | awk -F',' '{print $1","$2","$3","$4}' > "$output_file"
elif [[ "$header" == "timestamp,identifier,amount,description" ]]; then
# Transform the CSV file using awk, sed, and cut
tail -n +2 "$input_file" | awk -F',' '{print $1","$2","$3","$4}' > "$output_file"
else
# Log an error for unrecognized format
echo "$(date): Unrecognized format in $input_file" >> "$LOG_FILE"
return 1
fi

# Prepend the standard header to the output file
sed -i "1s/^/$STANDARD_HEADER\n/" "$output_file"

echo "$(date): Successfully processed $input_file" >> "$LOG_FILE"
}

# Process each CSV file in the input directory
for input_file in "$INPUT_DIR"/*.csv; do
output_file="$OUTPUT_DIR/$(basename "$input_file")"
if ! normalize_csv "$input_file" "$output_file"; then
echo "$(date): Failed to process $input_file" >> "$LOG_FILE"
fi
done

Option-2

#!/bin/bash

# Input directory containing CSV files
input_dir="/path/to/input"

# Output file for consolidated data
output_file="normalized_data.csv"

# Loop through CSV files
for file in "$input_dir"/*.csv; do
# Process each CSV
processed_data=$(process_csv.sh "$file")

# Check for processing errors (optional)
if [[ $? -ne 0 ]]; then
echo "Error processing $file, skipping..."
continue
fi

# Append processed data to output file (use `echo` for first file)
if [[ -z "$processed_data" ]]; then
echo "$processed_data" > "$output_file"
else
echo "$processed_data" >> "$output_file"
fi
done

echo "Consolidated data saved to: $output_file"

--

--

Biswanath Giri
Biswanath Giri

Written by Biswanath Giri

Cloud & AI Architect | Empowering People in Cloud Computing, Google Cloud AI/ML, and Google Workspace | Enabling Businesses on Their Cloud Journey

No responses yet