To automate the task of normalizing and consolidating daily CSV files from different sources into a standardized format using Unix shell scripting
2 min readJul 14, 2024
#!/bin/bash
INPUT_DIR="/path/to/input"
OUTPUT_DIR="/path/to/output"
LOG_FILE="/path/to/log_file.log"
# Define the standard CSV header
STANDARD_HEADER="Date,ID,Value,Description"
# Function to normalize a single CSV file
normalize_csv() {
local input_file=$1
local output_file=$2
# Read the first line (header) of the input file
header=$(head -n 1 "$input_file")
# Determine the format and apply appropriate transformations
if [[ "$header" == "date,id,value,desc" ]]; then
# Transform the CSV file using awk, sed, and cut
tail -n +2 "$input_file" | awk -F',' '{print $1","$2","$3","$4}' > "$output_file"
elif [[ "$header" == "timestamp,identifier,amount,description" ]]; then
# Transform the CSV file using awk, sed, and cut
tail -n +2 "$input_file" | awk -F',' '{print $1","$2","$3","$4}' > "$output_file"
else
# Log an error for unrecognized format
echo "$(date): Unrecognized format in $input_file" >> "$LOG_FILE"
return 1
fi
# Prepend the standard header to the output file
sed -i "1s/^/$STANDARD_HEADER\n/" "$output_file"
echo "$(date): Successfully processed $input_file" >> "$LOG_FILE"
}
# Process each CSV file in the input directory
for input_file in "$INPUT_DIR"/*.csv; do
output_file="$OUTPUT_DIR/$(basename "$input_file")"
if ! normalize_csv "$input_file" "$output_file"; then
echo "$(date): Failed to process $input_file" >> "$LOG_FILE"
fi
done
Option-2
#!/bin/bash
# Input directory containing CSV files
input_dir="/path/to/input"
# Output file for consolidated data
output_file="normalized_data.csv"
# Loop through CSV files
for file in "$input_dir"/*.csv; do
# Process each CSV
processed_data=$(process_csv.sh "$file")
# Check for processing errors (optional)
if [[ $? -ne 0 ]]; then
echo "Error processing $file, skipping..."
continue
fi
# Append processed data to output file (use `echo` for first file)
if [[ -z "$processed_data" ]]; then
echo "$processed_data" > "$output_file"
else
echo "$processed_data" >> "$output_file"
fi
done
echo "Consolidated data saved to: $output_file"