Je suis certain que ce problème peut être résolu avec une relative facilité, mais j'ai du mal à trouver le problème. Mon code lit simplement tous les mots d'un fichier puis stocke chaque mot, position de mot, début et fin de phrase dans un tableau. Le tableau est sorti dans un autre fichier texte.Problème lors de la lecture d'un fichier. Apparaît pour atteindre prématurément EOF
Je peux lire toutes les informations jusqu'à la dernière phrase et j'ai un bug. Des pensées?
/**
* Programmer: fryeguy
* Course:
* Program: TxtCrawl for MicroSearch
*
* Algorithm:
* TxtCrawl is the component of MicroSearch that reads text
* documents for search terms and stores them for
* indexing
*
* 1. Count words in doc, then initialize
* wordsFromDoc array to wordCount
* 2. Initiate output file for writing.
* 3. Open input file for reading words.
* 4. Until reaching EOF:
* 4.a. Set value for start "get pointer" in startSentence (.tellg()).
* 4.b. Store value for end "get pointer" in endSentence (.tellg()).
* 4.c. Reset "get pointer" to startSentence location.
* 4.d. Until reaching endSentence, Read into the
* array theWord, wordPos, startSent, and endSent
* 5. Write wordsFromDoc array to file
* 6. When EOF is reached close the files.
*/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
using namespace std;
struct wordProps // stores word info to be placed in array
{
string theWord; // stores the word
int wordPos; // stores the position of word
int startSent; // stores the start point of the sentence
int endSent; // stores the end point of the sentence
};
void countWords(string, int&, int&);
int main()
{
ifstream iFile; // file stream for reading in data
ofstream oFile; // file stream for writing data
string iFileName = "TextFile2.txt"; // name of test file to read from
string oFileName = "OutputFile.txt"; // name of test file to write to
string aLine = ""; // stores a line preceeding a newline character (\n)
string aWord = ""; // stores words from doc for indexing
int charCount = 0; // count of characters in doc
int wordCount = 0; // count of words in doc
int aLineWordCount = 0; // count of words in a single line being processed
int wordBegin = 0; // stores location of word in doc
int startSentence = 0; // stores pointer value for start of sentence
int endSentence = 0; // stores pointer value for end of sentence
/**
* 1. Count words in doc, then initialize
* wordsFromDoc array to wordCount
*/
countWords(iFileName, charCount, wordCount);
cout << "charCount: " << charCount << endl; // DEBUG CODE
cout << "wordCount: " << wordCount << endl; // DEBUG CODE
wordProps wordsFromDoc[wordCount];
cout<< "length of array: " << (sizeof(wordsFromDoc)/sizeof(*wordsFromDoc)) << endl; // DEBUG CODE
/**
* 2. Initiate output file for writing
*/
oFile.open (oFileName.c_str()); // setup output file and write header
oFile << setw(20) << left << "File Name: " << iFileName << endl;
oFile << setw(20) << "---------------------------------------" << endl << endl;
/**
* 3. Open input file for reading words
*/
iFile.open (iFileName.c_str());
if (!iFile.is_open())
cout << "No such file exists!" << endl;
else
{
/**
* 4. Until reaching EOF:
*/
// I have been attempting different counting methods assuming the eof was being reached prematurely
// The results really have not varied with this code
// while (iFile.tellg() != charCount)
while (!iFile.eof())
{
//cout << "count: " << count << endl;
/**
* 4.a. Set value for start "get pointer" in startSentence (.tellg()).
*/
startSentence = iFile.tellg();
cout << "startSentence: " << startSentence << endl; // DEBUG CODE
/**
* 4.b. Store value for end "get pointer" in endSentence (.tellg()).
*/
getline(iFile, aLine, '.');
cout << aLine << endl; // DEBUG CODE
endSentence = iFile.tellg();
aLine.clear();
cout << "endSentence: " << endSentence << endl; // DEBUG CODE
if (!iFile.is_open())
{
cout << "The if, iFile.tellg(): " << iFile.tellg() << endl; // DEBUG CODE
iFile.close();
iFile.open (iFileName.c_str());
}
/**
* 4.c. Reset "get pointer" to startSentence location.
*/
iFile.seekg(startSentence);
cout << "iFile.tellg(): " << iFile.tellg() << endl; // DEBUG CODE
/**
* 4.d. Until reaching endSentence, Read into the
* array theWord, wordPos, startSent, and endSent
*/
// As the last line is about to be read there is an error of some sort.
// My guess is that somehow I exceed the end of the file but my startSentence
// and endSentence variables are pointing where I think they should.
for (; iFile.tellg() < endSentence; aLineWordCount++)
{
wordsFromDoc[aLineWordCount].wordPos = iFile.tellg();
cout << "wordPos: " << wordsFromDoc[aLineWordCount].wordPos << endl; // DEBUG CODE
iFile >> wordsFromDoc[aLineWordCount].theWord;
cout << "theWord: " << wordsFromDoc[aLineWordCount].theWord << endl; // DEBUG CODE
wordsFromDoc[aLineWordCount].startSent = startSentence;
cout << "startSent: " << wordsFromDoc[aLineWordCount].startSent << endl; // DEBUG CODE
wordsFromDoc[aLineWordCount].endSent = endSentence;
cout << "endSent: " << wordsFromDoc[aLineWordCount].endSent << endl << endl; // DEBUG CODE
cout << "aLineWordCount: " << aLineWordCount << endl;
} // end for
} // end while !=iFile.eof
// THIS section of code is never reached because of the hang up above.
/**
* 5. Write wordsFromDoc array to file
*/
for (int count = 0; count < aLineWordCount; count++)
{
oFile << setw(20) << left
<< wordsFromDoc[count].theWord << " "
<< wordsFromDoc[count].wordPos << " "
<< wordsFromDoc[count].startSent << " "
<< wordsFromDoc[count].endSent << endl;
}
} // end else
/**
* 6. When EOF is reached close the files.
*/
iFile.close();
oFile.close();
// DEBUG CDODE for verifying results
// for (int count = 0; count < wordCount; count++) {
// cout << "theWord: " << wordsFromDoc[count].theWord << endl;
// cout << "wordPos: " << wordsFromDoc[count].wordPos << endl;
// cout << "startSent: " << wordsFromDoc[count].startSent << endl;
// cout << "endSent: " << wordsFromDoc[count].endSent << endl << endl;
// }
}
/**
* Implement countWords function
*/
void countWords(string theFileName, int &charCount, int &wordCount)
{
string theWord = "";
char theChar = ' ';
fstream inFile;
//count the chars
inFile.open (theFileName.c_str());
if (!inFile.is_open())
cout << "No such file exists!" << endl;
else
{
inFile.get(theChar);
while (!inFile.eof())
{
charCount++;
inFile.get(theChar);
}
}
inFile.close();
// count the words
inFile.open (theFileName.c_str());
if (!inFile.is_open())
cout << "No such file exists!" << endl;
else
{
while (!inFile.eof())
{
inFile >> theWord;
wordCount++;
}
}
inFile.close();
}
Besoin de voir l'entrée pour laquelle il semble à l'échec; le programme semble bien pour une entrée arbitraire. – frayser
Merci pour la réponse. J'ai testé avec des fichiers différents et ce texte donne l'erreur désirée (indésirable): "Ceci est un échantillon de texte à lire dans le moteur de recherche, je vais taper plusieurs phrases, y compris des périodes pour prévoir des pauses. vient après deux caractères de nouvelle ligne Un dernier morceau de texte devrait faire! " Lorsque le code atteint la dernière phrase, j'ai lu les valeurs startSent et endSent ainsi que wordPos (iFile.tellg()) et il semble que iFile soit libéré. – fryeguy
Faites-vous référence à un échec dû à une phrase qui se termine par un point d'exclamation (!) Au lieu d'un point?Le code est strictement écrit pour gérer uniquement les phrases se terminant par des périodes. – frayser