400+ Java Interview Questions and Answers blog

400+ Java Interview Questions and Answers blog


Java writing code -- compare two CSV files in Java

Posted: 01 Jan 2013 04:53 PM PST

Q. How will you go about writing code for a scenario where you need to compare two CSV files? assume that both CSV files are converted to "List" for both target and generate. Duplicate entries are allowed, and you need to list differences between the target file and the generated file.

A.  You can use a CSV framework like OpenCSV to convert a csv file to List. The method below tales this list compare the values.

Possible scenarios are

  • Target and generated files could have different no of records
  • Target and generated files could have same no of records but the content might be different
  • Generated file could have a few rows removed ( this is treated as record deleted)
  • Generated file could have a few new rows added ( this is treated as record inserted)
  • With or without duplicate records


One approach to doing this would be to use Java's Set interface (e.g. Set) and , then do a removeAll() with the target set on the generated set, thus retaining the rows which differ. This, of course, assumes that there are no duplicate rows in the files.


   // using FileUtils to read in the files.  HashSet<String[]> target = new HashSet<String[]>();  //...populate target via OpenCSV  HashSet<String[]> generated = new HashSet<String[]>();  //...populate generated via OpenCSV  generated.removeAll(target); // generated now contains only the lines which are not in target  


The above solution would not work if duplicates are allowed. Here is the one possible solution when there are duplicates.

     package com.myapp.compare;    import java.util.ArrayList;  import java.util.Arrays;  import java.util.Collections;  import java.util.Comparator;  import java.util.List;    import org.springframework.stereotype.Component;    @Component(value = "simpleCSVCompare")  public class SimpleCSVCompareImpl implements SimpleCSVCompare {        //compare target and generated CSV lines   public CSVCompareResult compareCSVLines(List<String[]> targetLines, List<String[]> generatedLines, CSVCompareResult result){        //Step1: Number of lines differ    if(targetLines.size() != generatedLines.size()){     result.addMismatchingLines("Line sizes don't match: " + " target=" + targetLines.size() + ", generated=" + generatedLines.size());     result.setStatus(CSVCompareResult.FileCompareStatus.UNMATCHED);    }        //Step 2: Contents differ    if(targetLines.size() == generatedLines.size()){     for (int i = 0; i < targetLines.size(); i++) {      String[] targetLine = targetLines.get(i);      String[] genLine = generatedLines.get(i);            if(!Arrays.deepEquals(targetLine, genLine)){       result.addMismatchingLines("Line contents don't match.");       result.setStatus(CSVCompareResult.FileCompareStatus.UNMATCHED);       break;      }      }    }        //Step 3: Identify the differing lines    if(CSVCompareResult.FileCompareStatus.UNMATCHED == result.getStatus()){     sortedList(targetLines);     sortedList(generatedLines);     evaluateCSVLineDifferences(targetLines,generatedLines,result);    }        return result;       }      public CSVCompareResult evaluateCSVLineDifferences(List<String[]> targetLines, List<String[]> generatedLines, CSVCompareResult result) {      result.setNoOfGeneratedLines(generatedLines.size());    result.setNoOfTargetLines(targetLines.size());      int genIndex = 0;    int targIndex = 0;      String[] lineTarget = targetLines.get(targIndex);    String[] lineGen = generatedLines.get(genIndex);      boolean targetDone = false;    boolean generatedDone = false;      while (!targetDone && !generatedDone) {       //target line is less than the generated line     if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) < 0) {      while (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) < 0 && !targetDone) {       result.addMismatchingLines("TARGET:" + Arrays.deepToString(lineTarget));       if (targIndex < targetLines.size() - 1) {        lineTarget = targetLines.get(++targIndex);       } else {        targetDone = true;       }      }       //target and generated lines are same      } else if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) == 0) {      if (targIndex < targetLines.size() - 1) {       lineTarget = targetLines.get(++targIndex);      } else {       targetDone = true;      }      if (genIndex < generatedLines.size() - 1) {       lineGen = generatedLines.get(++genIndex);      } else {       generatedDone = true;      }       //target line is greater than the generated line      } else if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) > 0) {      while (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) > 0 && !generatedDone) {       result.addMismatchingLines("GENERATED:" + Arrays.deepToString(lineGen));       if (genIndex < generatedLines.size() - 1) {        lineGen = generatedLines.get(++genIndex);       } else {        generatedDone = true;       }      }     }      }      //process any target lines not processed    while (!targetDone) {     result.addMismatchingLines("TARGET:" + Arrays.deepToString(lineTarget));     if (targIndex < targetLines.size() - 1) {      lineTarget = targetLines.get(++targIndex);     } else {      targetDone = true;     }    }      //process any generated lines not processed    while (!generatedDone) {     result.addMismatchingLines("GENERATED:" + Arrays.deepToString(lineGen));     if (genIndex < generatedLines.size() - 1) {      lineGen = generatedLines.get(++genIndex);     } else {      generatedDone = true;     }    }      return result;   }         public void sortedList(List<String[]> input){     Collections.sort(input, new Comparator<String[]>() {       @Override     public int compare(String[] o1, String[] o2) {      return Arrays.deepToString(o1).compareTo(Arrays.deepToString(o2));     }    });        }     public static void main(String[] args) {    String[] targA1 = { "a1" };    String[] genA1 = { "a1" };      String[] targA2 = { "a2" };    String[] genA2 = { "a2" };      String[] targA3 = { "a3" };    String[] genA3 = { "a3" };      String[] targA4 = { "a4" };    String[] genA4 = { "a4" };      String[] targA5 = { "a5" };      String[] genA6 = { "a6" };      List<String[]> targetLines = new ArrayList<String[]>();    List<String[]> generatedLines = new ArrayList<String[]>();      targetLines.add(targA1);    targetLines.add(targA2);    targetLines.add(targA2);    targetLines.add(targA3);    targetLines.add(targA4);    targetLines.add(targA5);      generatedLines.add(genA1);    generatedLines.add(genA2);    generatedLines.add(genA3);    generatedLines.add(genA4);    generatedLines.add(genA6);      CSVCompareResult result = new CSVCompareResult();      new SimpleCSVCompareImpl().evaluateCSVLineDifferences(targetLines, generatedLines, result);      System.out.println(result.getMismatchingLines());     }    }    

The results can be added to a value object like

   package com.myapp.compare;    import java.util.ArrayList;  import java.util.List;    public class CSVCompareResult {     public enum FileCompareStatus {    MATCHED, UNMATCHED   };     private String generatedFileName;   private String targetFileName;   private int noOfTargetLines;   private int noOfGeneratedLines;   private FileCompareStatus status = FileCompareStatus.MATCHED;     private List<String> mismatchingLines = new ArrayList<String>(20);     public String getGeneratedFileName() {    return generatedFileName;   }     public void setGeneratedFileName(String generatedFileName) {    this.generatedFileName = generatedFileName;   }     public String getTargetFileName() {    return targetFileName;   }     public void setTargetFileName(String targetFileName) {    this.targetFileName = targetFileName;   }     public int getNoOfTargetLines() {    return noOfTargetLines;   }     public void setNoOfTargetLines(int noOfTargetLines) {    this.noOfTargetLines = noOfTargetLines;   }     public int getNoOfGeneratedLines() {    return noOfGeneratedLines;   }     public void setNoOfGeneratedLines(int noOfGeneratedLines) {    this.noOfGeneratedLines = noOfGeneratedLines;   }     public List<String> getMismatchingLines() {    return mismatchingLines;   }     public void setMismatchingLines(List<String> mismatchingLineNumbers) {    this.mismatchingLines = mismatchingLineNumbers;   }     public void addMismatchingLines(String lineNumber) {    mismatchingLines.add(lineNumber);   }     public FileCompareStatus getStatus() {    return status;   }     public void setStatus(FileCompareStatus status) {    this.status = status;   }      public String outputResultsAsString(){    StringBuilder sb = new StringBuilder();    sb.append("Files Compared: " + " target=" + targetFileName + ", generated=" + generatedFileName);    sb.append("\n");    sb.append("Status:" + status);    sb.append("\n");        List<String> mismatchingLines = getMismatchingLines();        for (String msg : mismatchingLines) {     sb.append(msg);     sb.append("\n");    }        return sb.toString();   }    }    




Post a Comment