400+ Java Interview Questions and Answers blog |
Java writing code -- compare two CSV files in Java Posted: 01 Jan 2013 04:53 PM PST Q. How will you go about writing code for a scenario where you need to compare two CSV files? assume that both CSV files are converted to "List A. You can use a CSV framework like OpenCSV to convert a csv file to List
// using FileUtils to read in the files. HashSet<String[]> target = new HashSet<String[]>(); //...populate target via OpenCSV HashSet<String[]> generated = new HashSet<String[]>(); //...populate generated via OpenCSV generated.removeAll(target); // generated now contains only the lines which are not in target The above solution would not work if duplicates are allowed. Here is the one possible solution when there are duplicates. package com.myapp.compare; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import org.springframework.stereotype.Component; @Component(value = "simpleCSVCompare") public class SimpleCSVCompareImpl implements SimpleCSVCompare { //compare target and generated CSV lines public CSVCompareResult compareCSVLines(List<String[]> targetLines, List<String[]> generatedLines, CSVCompareResult result){ //Step1: Number of lines differ if(targetLines.size() != generatedLines.size()){ result.addMismatchingLines("Line sizes don't match: " + " target=" + targetLines.size() + ", generated=" + generatedLines.size()); result.setStatus(CSVCompareResult.FileCompareStatus.UNMATCHED); } //Step 2: Contents differ if(targetLines.size() == generatedLines.size()){ for (int i = 0; i < targetLines.size(); i++) { String[] targetLine = targetLines.get(i); String[] genLine = generatedLines.get(i); if(!Arrays.deepEquals(targetLine, genLine)){ result.addMismatchingLines("Line contents don't match."); result.setStatus(CSVCompareResult.FileCompareStatus.UNMATCHED); break; } } } //Step 3: Identify the differing lines if(CSVCompareResult.FileCompareStatus.UNMATCHED == result.getStatus()){ sortedList(targetLines); sortedList(generatedLines); evaluateCSVLineDifferences(targetLines,generatedLines,result); } return result; } public CSVCompareResult evaluateCSVLineDifferences(List<String[]> targetLines, List<String[]> generatedLines, CSVCompareResult result) { result.setNoOfGeneratedLines(generatedLines.size()); result.setNoOfTargetLines(targetLines.size()); int genIndex = 0; int targIndex = 0; String[] lineTarget = targetLines.get(targIndex); String[] lineGen = generatedLines.get(genIndex); boolean targetDone = false; boolean generatedDone = false; while (!targetDone && !generatedDone) { //target line is less than the generated line if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) < 0) { while (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) < 0 && !targetDone) { result.addMismatchingLines("TARGET:" + Arrays.deepToString(lineTarget)); if (targIndex < targetLines.size() - 1) { lineTarget = targetLines.get(++targIndex); } else { targetDone = true; } } //target and generated lines are same } else if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) == 0) { if (targIndex < targetLines.size() - 1) { lineTarget = targetLines.get(++targIndex); } else { targetDone = true; } if (genIndex < generatedLines.size() - 1) { lineGen = generatedLines.get(++genIndex); } else { generatedDone = true; } //target line is greater than the generated line } else if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) > 0) { while (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) > 0 && !generatedDone) { result.addMismatchingLines("GENERATED:" + Arrays.deepToString(lineGen)); if (genIndex < generatedLines.size() - 1) { lineGen = generatedLines.get(++genIndex); } else { generatedDone = true; } } } } //process any target lines not processed while (!targetDone) { result.addMismatchingLines("TARGET:" + Arrays.deepToString(lineTarget)); if (targIndex < targetLines.size() - 1) { lineTarget = targetLines.get(++targIndex); } else { targetDone = true; } } //process any generated lines not processed while (!generatedDone) { result.addMismatchingLines("GENERATED:" + Arrays.deepToString(lineGen)); if (genIndex < generatedLines.size() - 1) { lineGen = generatedLines.get(++genIndex); } else { generatedDone = true; } } return result; } public void sortedList(List<String[]> input){ Collections.sort(input, new Comparator<String[]>() { @Override public int compare(String[] o1, String[] o2) { return Arrays.deepToString(o1).compareTo(Arrays.deepToString(o2)); } }); } public static void main(String[] args) { String[] targA1 = { "a1" }; String[] genA1 = { "a1" }; String[] targA2 = { "a2" }; String[] genA2 = { "a2" }; String[] targA3 = { "a3" }; String[] genA3 = { "a3" }; String[] targA4 = { "a4" }; String[] genA4 = { "a4" }; String[] targA5 = { "a5" }; String[] genA6 = { "a6" }; List<String[]> targetLines = new ArrayList<String[]>(); List<String[]> generatedLines = new ArrayList<String[]>(); targetLines.add(targA1); targetLines.add(targA2); targetLines.add(targA2); targetLines.add(targA3); targetLines.add(targA4); targetLines.add(targA5); generatedLines.add(genA1); generatedLines.add(genA2); generatedLines.add(genA3); generatedLines.add(genA4); generatedLines.add(genA6); CSVCompareResult result = new CSVCompareResult(); new SimpleCSVCompareImpl().evaluateCSVLineDifferences(targetLines, generatedLines, result); System.out.println(result.getMismatchingLines()); } } The results can be added to a value object like package com.myapp.compare; import java.util.ArrayList; import java.util.List; public class CSVCompareResult { public enum FileCompareStatus { MATCHED, UNMATCHED }; private String generatedFileName; private String targetFileName; private int noOfTargetLines; private int noOfGeneratedLines; private FileCompareStatus status = FileCompareStatus.MATCHED; private List<String> mismatchingLines = new ArrayList<String>(20); public String getGeneratedFileName() { return generatedFileName; } public void setGeneratedFileName(String generatedFileName) { this.generatedFileName = generatedFileName; } public String getTargetFileName() { return targetFileName; } public void setTargetFileName(String targetFileName) { this.targetFileName = targetFileName; } public int getNoOfTargetLines() { return noOfTargetLines; } public void setNoOfTargetLines(int noOfTargetLines) { this.noOfTargetLines = noOfTargetLines; } public int getNoOfGeneratedLines() { return noOfGeneratedLines; } public void setNoOfGeneratedLines(int noOfGeneratedLines) { this.noOfGeneratedLines = noOfGeneratedLines; } public List<String> getMismatchingLines() { return mismatchingLines; } public void setMismatchingLines(List<String> mismatchingLineNumbers) { this.mismatchingLines = mismatchingLineNumbers; } public void addMismatchingLines(String lineNumber) { mismatchingLines.add(lineNumber); } public FileCompareStatus getStatus() { return status; } public void setStatus(FileCompareStatus status) { this.status = status; } public String outputResultsAsString(){ StringBuilder sb = new StringBuilder(); sb.append("Files Compared: " + " target=" + targetFileName + ", generated=" + generatedFileName); sb.append("\n"); sb.append("Status:" + status); sb.append("\n"); List<String> mismatchingLines = getMismatchingLines(); for (String msg : mismatchingLines) { sb.append(msg); sb.append("\n"); } return sb.toString(); } } |
You are subscribed to email updates from 400+ Java Interview Questions and Answers blog To stop receiving these emails, you may unsubscribe now. | Email delivery powered by Google |
Google Inc., 20 West Kinzie, Chicago IL USA 60610 |
Post a Comment