import components.set.Set; import components.set.Set1L; import components.simplereader.SimpleReader; import components.simplereader.SimpleReader1L; import components.simplewriter.SimpleWriter; import components.simplewriter.SimpleWriter1L; /** * Utility class to support string reassembly from fragments. * * @author Put your name here * * @mathdefinitions
*
* OVERLAPS (
* s1: string of character,
* s2: string of character,
* k: integer
* ) : boolean is
* 0 <= k and k <= |s1| and k <= |s2| and
* s1[|s1|-k, |s1|) = s2[0, k)
*
* SUBSTRINGS (
* strSet: finite set of string of character,
* s: string of character
* ) : finite set of string of character is
* {t: string of character
* where (t is in strSet and t is substring of s)
* (t)}
*
* SUPERSTRINGS (
* strSet: finite set of string of character,
* s: string of character
* ) : finite set of string of character is
* {t: string of character
* where (t is in strSet and s is substring of t)
* (t)}
*
* CONTAINS_NO_SUBSTRING_PAIRS (
* strSet: finite set of string of character
* ) : boolean is
* for all t: string of character
* where (t is in strSet)
* (SUBSTRINGS(strSet \ {t}, t) = {})
*
* ALL_SUPERSTRINGS (
* strSet: finite set of string of character
* ) : set of string of character is
* {t: string of character
* where (SUBSTRINGS(strSet, t) = strSet)
* (t)}
*
* CONTAINS_NO_OVERLAPPING_PAIRS (
* strSet: finite set of string of character
* ) : boolean is
* for all t1, t2: string of character, k: integer
* where (t1 /= t2 and t1 is in strSet and t2 is in strSet and
* 1 <= k and k <= |s1| and k <= |s2|)
* (not OVERLAPS(s1, s2, k))
*
*
*/
public final class StringReassembly {
/**
* Private no-argument constructor to prevent instantiation of this utility
* class.
*/
private StringReassembly() {
}
/**
* Reports the maximum length of a common suffix of {@code str1} and prefix
* of {@code str2}.
*
* @param str1
* first string
* @param str2
* second string
* @return maximum overlap between right end of {@code str1} and left end of
* {@code str2}
* @requires
* str1 is not substring of str2 and
* str2 is not substring of str1
*
* @ensures
* OVERLAPS(str1, str2, overlap) and
* for all k: integer
* where (overlap < k and k <= |str1| and k <= |str2|)
* (not OVERLAPS(str1, str2, k))
*
*/
public static int overlap(String str1, String str2) {
assert str1 != null : "Violation of: str1 is not null";
assert str2 != null : "Violation of: str2 is not null";
assert str2.indexOf(str1) < 0 : "Violation of: "
+ "str1 is not substring of str2";
assert str1.indexOf(str2) < 0 : "Violation of: "
+ "str2 is not substring of str1";
/*
* Start with maximum possible overlap and work down until a match is
* found; think about it and try it on some examples to see why
* iterating in the other direction doesn't work
*/
int maxOverlap = str2.length() - 1;
while (!str1.regionMatches(str1.length() - maxOverlap, str2, 0,
maxOverlap)) {
maxOverlap--;
}
return maxOverlap;
}
/**
* Returns concatenation of {@code str1} and {@code str2} from which one of
* the two "copies" of the common string of {@code overlap} characters at
* the end of {@code str1} and the beginning of {@code str2} has been
* removed.
*
* @param str1
* first string
* @param str2
* second string
* @param overlap
* amount of overlap
* @return combination with one "copy" of overlap removed
* @requires OVERLAPS(str1, str2, overlap)
* @ensures combination = str1[0, |str1|-overlap) * str2
*/
public static String combination(String str1, String str2, int overlap) {
assert str1 != null : "Violation of: str1 is not null";
assert str2 != null : "Violation of: str2 is not null";
assert 0 <= overlap
&& overlap <= str1.length()
&& overlap <= str2.length()
&& str1.regionMatches(str1.length() - overlap, str2, 0, overlap) : ""
+ "Violation of: OVERLAPS(str1, str2, overlap)";
/*
* Hint: consider using substring (a String method)
*/
// TODO: fill in body
/*
* This line added just to make the program compilable. Should be
* replaced with appropriate return statement.
*/
return "";
}
/**
* Adds {@code str} to {@code strSet} if and only if it is not a substring
* of any string already in {@code strSet}; and if it is added, also removes
* from {@code strSet} any string already in {@code strSet} that is a
* substring of {@code str}.
*
* @param strSet
* set to consider adding to
* @param str
* string to consider adding
* @updates strSet
* @requires CONTAINS_NO_SUBSTRING_PAIRS(strSet)
* @ensures
* if SUPERSTRINGS(#strSet, str) = {}
* then strSet = #strSet union {str} \ SUBSTRINGS(#strSet, str)
* else strSet = #strSet
*
*/
public static void addToSetAvoidingSubstrings(Set
* input.is_open and input.content = <> and
* linesFromInput = [maximal set of lines from #input.content such that
* CONTAINS_NO_SUBSTRING_PAIRS(linesFromInput)]
*
*/
public static Set
* CONTAINS_NO_SUBSTRING_PAIRS(strSet) and
* bestTwo.length >= 2
*
* @ensures
* bestTwo[0] is in strSet and
* bestTwo[1] is in strSet and
* OVERLAPS(bestTwo[0], bestTwo[1], bestOverlap) and
* for all str1, str2: string of character, overlap: integer
* where (str1 is in strSet and str2 is in strSet and
* OVERLAPS(str1, str2, overlap))
* (overlap <= bestOverlap)
*
*/
private static int bestOverlap(Set
* ALL_SUPERSTRINGS(strSet) is subset of ALL_SUPERSTRINGS(#strSet) and
* |strSet| <= |#strSet| and
* CONTAINS_NO_SUBSTRING_PAIRS(strSet) and
* CONTAINS_NO_OVERLAPPING_PAIRS(strSet)
*
*/
public static void assemble(Set
* out.is_open and
* out.content = #out.content *
* [text with each '~' replaced by line separator]
*
*/
public static void printWithLineSeparators(String text, SimpleWriter out) {
assert text != null : "Violation of: text is not null";
assert out != null : "Violation of: out is not null";
assert out.isOpen() : "Violation of: out.is_open";
// TODO: fill in body
}
/**
* Given a file name (relative to the path where the application is running)
* that contains fragments of a single original source text, one fragment
* per line, outputs to stdout the result of trying to reassemble the
* original text from those fragments using a "greedy assembler". The
* result, if reassembly is complete, might be the original text; but this
* might not happen because a greedy assembler can make a mistake and end up
* predicting the fragments were from a string other than the true original
* source text. It can also end up with two or more fragments that are
* mutually non-overlapping, in which case it outputs the remaining
* fragments, appropriately labelled.
*
* @param args
* Command-line arguments: not used
*/
public static void main(String[] args) {
SimpleReader in = new SimpleReader1L();
SimpleWriter out = new SimpleWriter1L();
/*
* Get input file name
*/
out.print("Input file (with fragments): ");
String inputFileName = in.nextLine();
SimpleReader inFile = new SimpleReader1L(inputFileName);
/*
* Get initial fragments from input file
*/
Set