Group B PR 3 DSBDA
Group B PR 3 DSBDA
Locate dataset (e.g., sample_weather.txt) for working on weather data which reads the
text input files and finds average for temperature, dew point and wind speed.
Step 3:
nano ~/.bashrc
export HADOOP_HOME=~/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
export HADOOP_CLASSPATH=$JAVA_HOME/lib/tools.jar
hadoop version
WeatherAnalysis/
├── WeatherMapper.java
├── WeatherReducer.java
├── WeatherAnalysisDriver.java
├── sample_weather.txt
📁 WeatherMapper.java
WeatherMapper.java
/*
○ ("Temperature", 28.5)
○ ("DewPoint", 20.0)
○ ("WindSpeed", 5.6)
🔁 Logic:
1. Skip header (if (!line.contains("Date")))
○ fields[1] → Temperature
*/
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public void map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException {
String line = value.toString();
if (!line.contains("Date")) {
String[] fields = line.split(",");
if (fields.length >= 4) {
try {
float temperature = Float.parseFloat(fields[1]);
float dewPoint = Float.parseFloat(fields[2]);
float windSpeed = Float.parseFloat(fields[3]);
metric.set("Temperature");
valueOut.set(temperature);
context.write(metric, valueOut);
metric.set("DewPoint");
valueOut.set(dewPoint);
context.write(metric, valueOut);
metric.set("WindSpeed");
valueOut.set(windSpeed);
context.write(metric, valueOut);
} catch (NumberFormatException e) {
// Ignore malformed lines
}
}
}
}
}
WeatherReducer.java
/*Calculate average for each metric.
🔧 Code Breakdown:
● Input: For each key like "Temperature", it gets all values (e.g., all temperature readings).
○ ("Temperature", 26.2)
🔁 Logic:
● Sum all float values.
● Count them.
*/
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
result.set(sum / count);
context.write(key, result);
}
}
WeatherAnalysisDriver.java
/*
Component Purpose
*/
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
job.setJarByClass(WeatherAnalysisDriver.class);
job.setMapperClass(WeatherMapper.class);
job.setReducerClass(WeatherReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
sample_weather.txt
Date,Temperature,DewPoint,WindSpeed
2024-04-01,28.5,20.0,5.6
2024-04-02,30.0,22.5,6.0
2024-04-03,29.0,21.0,5.8
2024-04-04,27.5,19.5,4.9
2024-04-05,31.0,23.0,6.3
2024-04-06,26.0,18.5,4.5
✅ Step-by-Step Fix
🟡 1. Make sure you're in the correct folder
You should be inside the folder where your .java files are located.
ls
cat output/part-r-00000